allpy
changeset 418:a4d7438c142f
add fileio module, remove fasta module, add msf support (see #31)
author | boris (netbook) <bnagaev@gmail.com> |
---|---|
date | Fri, 11 Feb 2011 15:04:17 +0300 |
parents | 8d678611cd1c |
children | e4c4151b9dc3 |
files | allpy/base.py allpy/fasta.py allpy/fileio.py |
diffstat | 3 files changed, 115 insertions(+), 49 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Fri Feb 11 14:13:42 2011 +0300 1.2 +++ b/allpy/base.py Fri Feb 11 15:04:17 2011 +0300 1.3 @@ -2,7 +2,7 @@ 1.4 import re 1.5 1.6 import util 1.7 -import fasta 1.8 +import fileio 1.9 1.10 # import this very module as means of having all related classes in one place 1.11 import base 1.12 @@ -196,22 +196,34 @@ 1.13 If sequences in file have gaps (detected as characters belonging to 1.14 `gaps` set), treat them accordingly. 1.15 """ 1.16 - assert format == 'fasta', "We don't support other formats yet" 1.17 - for (name, description, body) in fasta.parse_file(file): 1.18 + sequences = [] 1.19 + if format == 'fasta': 1.20 + sequences = fileio.FastaIo(file).get_all_strings() 1.21 + elif format == 'msf': 1.22 + sequences = fileio.MsfIo(file).get_all_strings() 1.23 + else: 1.24 + raise Exception("We don't support other formats yet") 1.25 + for (name, description, body) in sequences: 1.26 self.append_row_from_string(body, name, description, file.name, gaps) 1.27 return self 1.28 1.29 - def to_file(self, file, format='fasta'): 1.30 + def to_file(self, file, format='fasta', gap='-'): 1.31 """Write alignment in FASTA file as sequences with gaps.""" 1.32 assert format == "fasta", "We don't support other formats yet" 1.33 def char(monomer): 1.34 if monomer: 1.35 return monomer.code1 1.36 - return "-" 1.37 + return gap 1.38 + if format == 'fasta': 1.39 + io = fileio.FastaIo(file) 1.40 + elif format == 'msf': 1.41 + io = fileio.MsfIo(file) 1.42 + else: 1.43 + raise Exception("We don't support other formats yet") 1.44 for row in self.rows_as_lists(): 1.45 seq = row.sequence 1.46 line = "".join(map(char, row)) 1.47 - fasta.save_file(file, line, seq.name, seq.description) 1.48 + io.save_string(line, seq.name, seq.description) 1.49 1.50 # Data access methods for alignment 1.51 # =================================
2.1 --- a/allpy/fasta.py Fri Feb 11 14:13:42 2011 +0300 2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 2.3 @@ -1,43 +0,0 @@ 2.4 -import util 2.5 - 2.6 -def parse_file(file): 2.7 - """Parse fasta file, remove spaces and newlines from sequence bodies. 2.8 - 2.9 - Return a list of tuples (name, description, sequence_body). 2.10 - """ 2.11 - sequences = [] 2.12 - for part in file.read().split("\n>"): 2.13 - header, _, body = part.partition("\n") 2.14 - header = header.lstrip(">").strip() 2.15 - name, _, description = header.partition(" ") 2.16 - name = name.strip() 2.17 - description = description.strip() 2.18 - body = util.remove_each(body, " \n\r\t\v") 2.19 - sequences.append((name, description, body)) 2.20 - return sequences 2.21 - 2.22 -def save_file(out_file, string, name, description='', long_line=70): 2.23 - """ Saves given string to out_file in fasta_format 2.24 - 2.25 - Splits long lines to substrings of length=long_line 2.26 - To prevent this, set long_line=None 2.27 - """ 2.28 - if description: 2.29 - name += " " + description 2.30 - out_file.write(">%s\n" % name) 2.31 - if long_line: 2.32 - for i in range(0, len(string) // long_line + 1): 2.33 - out_file.write("%s\n" % string[i*long_line : i*long_line + long_line]) 2.34 - else: 2.35 - out_file.write("%s\n" % string) 2.36 - 2.37 -def determine_long_line(in_file): 2.38 - """ Returns maximum sequence line length in fasta file """ 2.39 - sequences = in_file.read().split('>') 2.40 - for sequence in sequences[1:]: 2.41 - lines = sequence.split('\n')[1:] 2.42 - if len(lines) >= 2: 2.43 - return len(lines[0].strip()) 2.44 - return 70 2.45 - 2.46 -# vim: set ts=4 sts=4 sw=4 et:
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/allpy/fileio.py Fri Feb 11 15:04:17 2011 +0300 3.3 @@ -0,0 +1,97 @@ 3.4 +import os 3.5 +from tempfile import NamedTemporaryFile 3.6 + 3.7 +import util 3.8 + 3.9 +class BaseIo(object): 3.10 + """ Base class providing alignment/sequence import and export 3.11 + 3.12 + Data: 3.13 + * file - file object 3.14 + """ 3.15 + 3.16 + def __init__(self, file): 3.17 + self.file = file 3.18 + 3.19 + def save_string(self, string, name, description=''): 3.20 + """ Saves given string to file 3.21 + 3.22 + Splits long lines to substrings of length=long_line 3.23 + To prevent this, set long_line=None 3.24 + """ 3.25 + pass 3.26 + 3.27 + def get_all_strings(self): 3.28 + """Parse fasta file, remove spaces and newlines from sequence bodies. 3.29 + 3.30 + Return a list of tuples (name, description, sequence_body). 3.31 + """ 3.32 + pass 3.33 + 3.34 + def get_string(self, name): 3.35 + """ return tuple (name, description, string) for sequence with name name """ 3.36 + for name_test, description, body in self.get_all_strings(): 3.37 + if name_test == name: 3.38 + return (name_test, description, body) 3.39 + 3.40 +class FastaIo(BaseIo): 3.41 + """ Fasta import and export 3.42 + 3.43 + Additional data: 3.44 + * long_line - max length of file line while export 3.45 + Splits long lines to substrings of length=long_line 3.46 + To prevent this, set long_line=None 3.47 + """ 3.48 + 3.49 + def __init__(self, file, long_line=70): 3.50 + BaseIo.__init__(self, file) 3.51 + self.long_line = long_line 3.52 + 3.53 + def save_string(self, string, name, description=''): 3.54 + if description: 3.55 + name += " " + description 3.56 + self.file.write(">%s\n" % name) 3.57 + if self.long_line: 3.58 + for i in range(0, len(string) // self.long_line + 1): 3.59 + start = i*self.long_line 3.60 + end = i*self.long_line + self.long_line 3.61 + self.file.write("%s\n" % string[start:end]) 3.62 + else: 3.63 + self.file.write("%s\n" % string) 3.64 + 3.65 + def get_all_strings(self): 3.66 + for part in self.file.read().split("\n>"): 3.67 + header, _, body = part.partition("\n") 3.68 + header = header.lstrip(">").strip() 3.69 + name, _, description = header.partition(" ") 3.70 + name = name.strip() 3.71 + description = description.strip() 3.72 + body = util.remove_each(body, " \n\r\t\v") 3.73 + yield (name, description, body) 3.74 + 3.75 + def get_string(self, name): 3.76 + for name_test, description, body in self.get_all_strings(): 3.77 + if name_test == name: 3.78 + return (name_test, description, body) 3.79 + 3.80 +class MsfIo(BaseIo): 3.81 + """ Msf import and export """ 3.82 + 3.83 + def __init__(self, file): 3.84 + BaseIo.__init__(self, file) 3.85 + self.tmp_fasta = NamedTemporaryFile(delete=False) 3.86 + self.tmp_fasta.close() 3.87 + os.system("seqret %(msf)s %(fasta)s" % \ 3.88 + {'msf': self.file, 'fasta': self.tmp_fasta.name}) 3.89 + 3.90 + def save_string(self, string, name, description=''): 3.91 + self.tmp_fasta = open(tmp_fasta.name, 'a') 3.92 + fasta = FastaIo(self.tmp_fasta) 3.93 + fasta.save_string(string, name, description) 3.94 + self.tmp_fasta.close() 3.95 + 3.96 + def get_all_strings(self): 3.97 + self.tmp_fasta = open(tmp_fasta.name) 3.98 + fasta = FastaIo(self.tmp_fasta) 3.99 + return fasta.get_all_strings() 3.100 +