Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/cd43d59c0dba
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:09:17 2012
Кодировка:
allpy: cd43d59c0dba

allpy

changeset 337:cd43d59c0dba

flush_left renamed to flush_left_vblock. flush_left_vblocks created for multiple blocks flush left
author Andrei <aba@belozersky.msu.ru>
date Wed, 12 Jan 2011 22:27:08 +0300
parents 679494ad2f4e
children 951b7e159698
files utils/flush_left_vblock.py utils/flush_left_vblocks.py
diffstat 2 files changed, 119 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/utils/flush_left_vblock.py	Wed Jan 12 22:27:08 2011 +0300
     1.3 @@ -0,0 +1,48 @@
     1.4 +#!/usr/bin/python
     1.5 +"""Flush all monomers in given range to the left, all gaps to the right.
     1.6 +
     1.7 +All position indexes are counting from 1.
     1.8 +"""
     1.9 +import optparse
    1.10 +import sys
    1.11 +import os
    1.12 +from allpy import protein
    1.13 +
    1.14 +def main():
    1.15 +    alignment = protein.Alignment.from_fasta(open(options.in_file))
    1.16 +    if not options.begin:
    1.17 +        options.begin = 1
    1.18 +    if not options.end:
    1.19 +        options.end = len(alignment.columns)
    1.20 +    columns = alignment.columns[options.begin-1:options.end]
    1.21 +    block = protein.Block.from_alignment(alignment, columns=columns)
    1.22 +    block.flush_left()
    1.23 +    alignment.to_fasta(open(options.out_file, "w"))
    1.24 +    if options.msf:
    1.25 +        os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf")
    1.26 +        os.system("rm " + options.out_file)
    1.27 +
    1.28 +if __name__ == "__main__":
    1.29 +    usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip())
    1.30 +    parser = optparse.OptionParser(usage=usage)
    1.31 +    parser.add_option("-i", "--in-file",
    1.32 +        help="Input alignment file (in FASTA format)")
    1.33 +    parser.add_option("-o", "--out-file",
    1.34 +        help="Output file")
    1.35 +    parser.add_option("-b", "--begin", type=int,
    1.36 +        help="Position in alignment to start from")
    1.37 +    parser.add_option("-e", "--end", type=int,
    1.38 +        help="Position in alignment to end with")
    1.39 +    parser.add_option("-m", "--msf", action='store_true',
    1.40 +        help="Output in MSF format (FASTA by default)")
    1.41 +
    1.42 +    options, args = parser.parse_args()
    1.43 +
    1.44 +    if args:
    1.45 +        parser.error("We take no positional arguments.")
    1.46 +    if not options.in_file or not options.out_file:
    1.47 +        parser.error("Both -i and -o parameters must be given.")
    1.48 +
    1.49 +    main()
    1.50 +
    1.51 +# vim: set et ts=4 sts=4 sw=4:
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/utils/flush_left_vblocks.py	Wed Jan 12 22:27:08 2011 +0300
     2.3 @@ -0,0 +1,71 @@
     2.4 +#!/usr/bin/python
     2.5 +"""Flush all monomers in given ranges to the left, all gaps to the right.
     2.6 +
     2.7 +All position indexes are counting from 1.
     2.8 +
     2.9 +File with ranges format is as follows
    2.10 +#from to
    2.11 +10 15
    2.12 +107 121
    2.13 +etc
    2.14 +
    2.15 +Lines with "#" in 1st position are skipped
    2.16 + 
    2.17 +"""
    2.18 +import optparse
    2.19 +import sys
    2.20 +import os
    2.21 +from allpy import protein
    2.22 +
    2.23 +def main(ranges):
    2.24 +    alignment = protein.Alignment.from_fasta(open(options.in_file))
    2.25 +    for begin, end in ranges:
    2.26 +        columns = alignment.columns[begin-1:end]
    2.27 +        block = protein.Block.from_alignment(alignment, columns=columns)
    2.28 +        block.flush_left()
    2.29 +
    2.30 +    alignment.to_fasta(open(options.out_file, "w"))
    2.31 +    if options.msf:
    2.32 +        os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf")
    2.33 +        os.system("rm " + options.out_file)
    2.34 +    
    2.35 +def ranges():
    2.36 +    ranges = []
    2.37 +    for line_no, line in enumerate(open(options.ranges), 1):        
    2.38 +        if line.strip() == "":
    2.39 +            continue
    2.40 +        if line[0] == "#":
    2.41 +            continue
    2.42 +        try:
    2.43 +            begin, end = line.strip().split()
    2.44 +            begin = int(begin)
    2.45 +            end = int(end)
    2.46 +        except Exception:
    2.47 +            print "Warning: wrong format in line %s, ignoring" % line_no
    2.48 +            continue
    2.49 +        ranges.append( (begin, end) )
    2.50 +    return ranges      
    2.51 +
    2.52 +
    2.53 +if __name__ == "__main__":
    2.54 +    usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip())
    2.55 +    parser = optparse.OptionParser(usage=usage)
    2.56 +    parser.add_option("-i", "--in-file",
    2.57 +        help="Input alignment file (in FASTA format)")
    2.58 +    parser.add_option("-o", "--out-file",
    2.59 +        help="Output file")
    2.60 +    parser.add_option("-r", "--ranges",
    2.61 +        help="Input file with ranges")
    2.62 +    parser.add_option("-m", "--msf", action='store_true',
    2.63 +        help="Output in MSF format (FASTA by default)")
    2.64 +
    2.65 +    options, args = parser.parse_args()
    2.66 +
    2.67 +    if args:
    2.68 +        parser.error("We take no positional arguments.")
    2.69 +    if not options.in_file or not options.out_file:
    2.70 +        parser.error("Both -i and -o parameters must be given.")
    2.71 +
    2.72 +    main(ranges())
    2.73 +
    2.74 +# vim: set et ts=4 sts=4 sw=4: