allpy
changeset 337:cd43d59c0dba
flush_left renamed to flush_left_vblock. flush_left_vblocks created for multiple blocks flush left
author | Andrei <aba@belozersky.msu.ru> |
---|---|
date | Wed, 12 Jan 2011 22:27:08 +0300 |
parents | 679494ad2f4e |
children | 951b7e159698 |
files | utils/flush_left_vblock.py utils/flush_left_vblocks.py |
diffstat | 2 files changed, 119 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/utils/flush_left_vblock.py Wed Jan 12 22:27:08 2011 +0300 1.3 @@ -0,0 +1,48 @@ 1.4 +#!/usr/bin/python 1.5 +"""Flush all monomers in given range to the left, all gaps to the right. 1.6 + 1.7 +All position indexes are counting from 1. 1.8 +""" 1.9 +import optparse 1.10 +import sys 1.11 +import os 1.12 +from allpy import protein 1.13 + 1.14 +def main(): 1.15 + alignment = protein.Alignment.from_fasta(open(options.in_file)) 1.16 + if not options.begin: 1.17 + options.begin = 1 1.18 + if not options.end: 1.19 + options.end = len(alignment.columns) 1.20 + columns = alignment.columns[options.begin-1:options.end] 1.21 + block = protein.Block.from_alignment(alignment, columns=columns) 1.22 + block.flush_left() 1.23 + alignment.to_fasta(open(options.out_file, "w")) 1.24 + if options.msf: 1.25 + os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf") 1.26 + os.system("rm " + options.out_file) 1.27 + 1.28 +if __name__ == "__main__": 1.29 + usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip()) 1.30 + parser = optparse.OptionParser(usage=usage) 1.31 + parser.add_option("-i", "--in-file", 1.32 + help="Input alignment file (in FASTA format)") 1.33 + parser.add_option("-o", "--out-file", 1.34 + help="Output file") 1.35 + parser.add_option("-b", "--begin", type=int, 1.36 + help="Position in alignment to start from") 1.37 + parser.add_option("-e", "--end", type=int, 1.38 + help="Position in alignment to end with") 1.39 + parser.add_option("-m", "--msf", action='store_true', 1.40 + help="Output in MSF format (FASTA by default)") 1.41 + 1.42 + options, args = parser.parse_args() 1.43 + 1.44 + if args: 1.45 + parser.error("We take no positional arguments.") 1.46 + if not options.in_file or not options.out_file: 1.47 + parser.error("Both -i and -o parameters must be given.") 1.48 + 1.49 + main() 1.50 + 1.51 +# vim: set et ts=4 sts=4 sw=4:
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/utils/flush_left_vblocks.py Wed Jan 12 22:27:08 2011 +0300 2.3 @@ -0,0 +1,71 @@ 2.4 +#!/usr/bin/python 2.5 +"""Flush all monomers in given ranges to the left, all gaps to the right. 2.6 + 2.7 +All position indexes are counting from 1. 2.8 + 2.9 +File with ranges format is as follows 2.10 +#from to 2.11 +10 15 2.12 +107 121 2.13 +etc 2.14 + 2.15 +Lines with "#" in 1st position are skipped 2.16 + 2.17 +""" 2.18 +import optparse 2.19 +import sys 2.20 +import os 2.21 +from allpy import protein 2.22 + 2.23 +def main(ranges): 2.24 + alignment = protein.Alignment.from_fasta(open(options.in_file)) 2.25 + for begin, end in ranges: 2.26 + columns = alignment.columns[begin-1:end] 2.27 + block = protein.Block.from_alignment(alignment, columns=columns) 2.28 + block.flush_left() 2.29 + 2.30 + alignment.to_fasta(open(options.out_file, "w")) 2.31 + if options.msf: 2.32 + os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf") 2.33 + os.system("rm " + options.out_file) 2.34 + 2.35 +def ranges(): 2.36 + ranges = [] 2.37 + for line_no, line in enumerate(open(options.ranges), 1): 2.38 + if line.strip() == "": 2.39 + continue 2.40 + if line[0] == "#": 2.41 + continue 2.42 + try: 2.43 + begin, end = line.strip().split() 2.44 + begin = int(begin) 2.45 + end = int(end) 2.46 + except Exception: 2.47 + print "Warning: wrong format in line %s, ignoring" % line_no 2.48 + continue 2.49 + ranges.append( (begin, end) ) 2.50 + return ranges 2.51 + 2.52 + 2.53 +if __name__ == "__main__": 2.54 + usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip()) 2.55 + parser = optparse.OptionParser(usage=usage) 2.56 + parser.add_option("-i", "--in-file", 2.57 + help="Input alignment file (in FASTA format)") 2.58 + parser.add_option("-o", "--out-file", 2.59 + help="Output file") 2.60 + parser.add_option("-r", "--ranges", 2.61 + help="Input file with ranges") 2.62 + parser.add_option("-m", "--msf", action='store_true', 2.63 + help="Output in MSF format (FASTA by default)") 2.64 + 2.65 + options, args = parser.parse_args() 2.66 + 2.67 + if args: 2.68 + parser.error("We take no positional arguments.") 2.69 + if not options.in_file or not options.out_file: 2.70 + parser.error("Both -i and -o parameters must be given.") 2.71 + 2.72 + main(ranges()) 2.73 + 2.74 +# vim: set et ts=4 sts=4 sw=4: