allpy
changeset 704:80043822a41e
Added fileio.File.read_alignment and fileio.File.write_alignment [closes #57]
This is now the recommended interface.
In the following commits this interface will be used to write markup together
with alignment.
Previously, fileio received the alignment torn-apart into sequence
representations as strings with gaps, sequence names and descriptions.
Now, fileio tears the alignment apart by itself.
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Thu, 07 Jul 2011 19:21:12 +0400 |
parents | eb2a902b1e0a |
children | 8b03bd2f919a |
files | allpy/base.py allpy/fileio.py |
diffstat | 2 files changed, 29 insertions(+), 20 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Thu Jul 07 12:53:53 2011 +0400 1.2 +++ b/allpy/base.py Thu Jul 07 19:21:12 2011 +0400 1.3 @@ -221,17 +221,12 @@ 1.4 If sequences in file have gaps (detected as characters belonging to 1.5 `gaps` set), treat them accordingly. 1.6 """ 1.7 - sequences = [] 1.8 - io = fileio.File(file, format) 1.9 - for name, description, body in io.read_strings(): 1.10 - self.append_row_from_string(body, name, description, file.name, gaps) 1.11 + fileio.File(file, format, gaps=gaps).read_alignment(self) 1.12 return self 1.13 1.14 def to_file(self, file, format='fasta', gap='-'): 1.15 """Write alignment in FASTA file as sequences with gaps.""" 1.16 - strings = [(s, s.sequence.name, s.sequence.description) 1.17 - for s in self.rows_as_strings()] 1.18 - fileio.File(file, format).write_strings(strings) 1.19 + fileio.File(file, format, gaps=gap).write_alignment(self) 1.20 1.21 # Data access methods for alignment 1.22 # =================================
2.1 --- a/allpy/fileio.py Thu Jul 07 12:53:53 2011 +0400 2.2 +++ b/allpy/fileio.py Thu Jul 07 19:21:12 2011 +0400 2.3 @@ -5,19 +5,37 @@ 2.4 2.5 class File(object): 2.6 """Automatical file IO.""" 2.7 - def __new__(cls, file, format="fasta"): 2.8 + def __new__(cls, file, format="fasta", **kw): 2.9 if format == "fasta": 2.10 - return FastaFile(file) 2.11 + return FastaFile(file, **kw) 2.12 else: 2.13 - return EmbossFile(file, format) 2.14 + return EmbossFile(file, format, **kw) 2.15 2.16 -class FastaFile(object): 2.17 +class AlignmentFile(object): 2.18 + """Some helpers.""" 2.19 + 2.20 + def __init__(self, file, format='fasta', gaps='-', wrap_column=70): 2.21 + self.file = file 2.22 + self.format = format 2.23 + self.gaps = gaps 2.24 + self.wrap_column = wrap_column 2.25 + 2.26 + def write_alignment(self, alignment): 2.27 + """Append alignment to the file.""" 2.28 + self.write_strings( 2.29 + (row, row.sequence.name, row.sequence.description) 2.30 + for row in alignment.rows_as_strings(self.gaps) 2.31 + ) 2.32 + 2.33 + def read_alignment(self, alignment): 2.34 + """Read alignment from the file.""" 2.35 + append_row = alignment.append_row_from_string 2.36 + for name, description, body in self.read_strings(): 2.37 + append_row(body, name, description, file.name, self.gaps) 2.38 + 2.39 +class FastaFile(AlignmentFile): 2.40 """Fasta parser & writer.""" 2.41 2.42 - def __init__(self, file, wrap_column=70): 2.43 - self.file = file 2.44 - self.wrap_column = wrap_column 2.45 - 2.46 def write_string(self, string, name, description=''): 2.47 """Append one sequence to file.""" 2.48 if description: 2.49 @@ -49,13 +67,9 @@ 2.50 body = util.remove_each(body, " \n\r\t\v") 2.51 yield (name, description, body) 2.52 2.53 -class EmbossFile(object): 2.54 +class EmbossFile(AlignmentFile): 2.55 """Parser & writer for file formats supported by EMBOSS.""" 2.56 2.57 - def __init__(self, file, format): 2.58 - self.file = file 2.59 - self.format = format 2.60 - 2.61 def write_strings(self, sequences): 2.62 """Write sequences to file.""" 2.63 pipe = Popen(['seqret', 'stdin', '%s::stdout' % self.format],