Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/38888b46c342
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:11:13 2012
Кодировка:
allpy: 38888b46c342

allpy

changeset 324:38888b46c342

Added usecases from wiki to test/ directory
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Fri, 17 Dec 2010 00:45:20 +0300
parents 2a6c2cffa891
children 6fdc4406e2fd
files test/usecase1.py test/usecase2.py test/usecase3.py
diffstat 3 files changed, 143 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/test/usecase1.py	Fri Dec 17 00:45:20 2010 +0300
     1.3 @@ -0,0 +1,35 @@
     1.4 +from allpy import protein
     1.5 +
     1.6 +# Create sequences from string representation of sequence body
     1.7 +sequence_1 = allpy.protein.Sequence.from_string("mkstf", name="E2E4")
     1.8 +sequence_2 = allpy.protein.Sequence.from_string("mstkfff")
     1.9 +
    1.10 +# Create alignment from sequences
    1.11 +alignment = protein.Alignment()
    1.12 +alignment.append_sequence(sequence_1)
    1.13 +alignment.append_sequence(sequence_2)
    1.14 +alignment.realign("muscle")
    1.15 +
    1.16 +# For each sequence, print number of gaps and non-gaps in alignment
    1.17 +for row in alignment.rows():
    1.18 +    gaps = 0
    1.19 +    monomers = 0
    1.20 +    for column in alignment.columns:
    1.21 +        if column in row:
    1.22 +            monomers += 1
    1.23 +        else:
    1.24 +            gaps += 1
    1.25 +    print "%s: %s gaps, %s non-gaps" % (row.sequence.name, gaps, monomers)
    1.26 +
    1.27 +# Print number of gaps in each column
    1.28 +gaps = []
    1.29 +for column in alignment.columns:
    1.30 +    column_gaps = 0
    1.31 +    for sequence in alignment.sequences:
    1.32 +        if sequence not in column:
    1.33 +            column_gaps += 1
    1.34 +    gaps.append(column_gaps)
    1.35 +print " ".join(map(str, gaps))
    1.36 +
    1.37 +# Write alignment to file
    1.38 +alingment.to_fasta(open("new_file.fasta", "w"))
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/test/usecase2.py	Fri Dec 17 00:45:20 2010 +0300
     2.3 @@ -0,0 +1,72 @@
     2.4 +# Fragments are in pair_repeat.fasta
     2.5 +from allpy import dna
     2.6 +width = 15
     2.7 +treshold = 14
     2.8 +
     2.9 +def my_column_mark(column, threshold):
    2.10 +    """Helper to mark column (given as dict) by identity."""
    2.11 +    count = {}
    2.12 +    for sequence, monomer in column:
    2.13 +        code = monomer.code1
    2.14 +        count[code] = count.get(code, 0) + 1
    2.15 +    for code in count:
    2.16 +        if count[code] > threshold:
    2.17 +            return "+"
    2.18 +    return "-"
    2.19 +
    2.20 +def my_pair_mark(column_as_list):
    2.21 +    """Helper to mark column of 2 sequences (given as list) by identity."""
    2.22 +    if column[0] is None or column[1] is None:
    2.23 +        return "-"
    2.24 +    if column[0].code1 == column[0].code1:   
    2.25 +        return "+"
    2.26 +    return "-"
    2.27 +
    2.28 +def find_runs(markup):
    2.29 +    """Fund long positive runs.
    2.30 +    
    2.31 +    This obscure and probably broken function has nothing to do with allpy,
    2.32 +    so it's presence in the example is unnecessary.
    2.33 +    """
    2.34 +    position = 0
    2.35 +    count = 0
    2.36 +    plus_positions=[]
    2.37 +    for i in range(len(column_marks)):
    2.38 +        position += 1
    2.39 +        if position < width :
    2.40 +            if column_marks[i]=="+":
    2.41 +                count += 1
    2.42 +            column_window_marks += "-"  
    2.43 +            continue 
    2.44 +        if position > width:
    2.45 +            if column_marks[i-width] == "+":
    2.46 +                count -=1 
    2.47 +        if count >= treshold:
    2.48 +            plus_positions.append(position)
    2.49 +    if len(positions)==0:
    2.50 +        raise Exception("No blocks in alignment")
    2.51 +    blocks=[]
    2.52 +    start = positions[0]-width + 1
    2.53 +    stop = positions[0]
    2.54 +
    2.55 +    for p in positions[1:]:
    2.56 +        if p == stop +1:
    2.57 +            stop = p
    2.58 +            cuntinue
    2.59 +        blocks.append((start,stop))
    2.60 +        start = p - width + 1
    2.61 +        stop = p
    2.62 +
    2.63 +def main():
    2.64 +    alignment = dna.Alignment.from_fasta(open("pair_repeat.fasta"))
    2.65 +    if alignment.height != 2:
    2.66 +        raise Exception("Input must have exactly 2 sequences!")
    2.67 +    alignment.realign("needle", gap_open = 0)
    2.68 +    markup = alignment.map_columns(my_pair_mark)
    2.69 +    print find_runs(markup)
    2.70 +
    2.71 +try:
    2.72 +    main()
    2.73 +except Exception, e:
    2.74 +    print "An error has occured:", e
    2.75 +
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/test/usecase3.py	Fri Dec 17 00:45:20 2010 +0300
     3.3 @@ -0,0 +1,36 @@
     3.4 +from allpy import protein
     3.5 +alignment = protein.Alignment.from_fasta(open("aln.fasta"))
     3.6 +#conservative = [(10,20), (40,50)]
     3.7 +conservative = [(0,6),(18,37)]
     3.8 +
     3.9 +def ranges_to_markup(ranges):
    3.10 +    """Convert list of ranges to line of markup.
    3.11 +    
    3.12 +    This has nothing to do with allpy.
    3.13 +    """
    3.14 +    markup = ["-"] * len(alignment.columns)
    3.15 +    for begin, end in ranges:
    3.16 +        for i in range(begin, end+1):
    3.17 +            markup[i] = "+"
    3.18 +    return "".join(markup)
    3.19 +
    3.20 +def markup_to_blocks(markup):
    3.21 +    """Convert markup line to a bunch of blocks, one for each sequential run."""
    3.22 +    current = None
    3.23 +    blocks = {}
    3.24 +    for mark, column in zip(markup, alignment.columns):
    3.25 +        if mark != current:
    3.26 +            block = protein.Block.from_alignment(alignment, columns=[])
    3.27 +            blocks[mark] = blocks.get(mark, []) + [block]
    3.28 +        current = mark
    3.29 +        blocks[mark][-1].columns.append(column)
    3.30 +    return blocks
    3.31 +
    3.32 +def main():
    3.33 +    markup = ranges_to_markup(conservative)
    3.34 +    blocks = markup_to_blocks(markup)
    3.35 +    for block in blocks["-"]:
    3.36 +        block.flush_left()
    3.37 +    alignment.to_fasta(open("output.fasta", "w"))
    3.38 +
    3.39 +main()