allpy
changeset 313:316122b6b70e
Changed interfaces of allpy.fasta, updated allpy.base accordingly
* Renamed allpy.fasta.parse_fasta -> parse_file, save_fasta -> save_file
* parse_file returns a list of 3-tuples instead of dict
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Thu, 16 Dec 2010 23:42:14 +0300 |
parents | 5417b9d6b63e |
children | 3347b5f31477 |
files | allpy/base.py allpy/fasta.py |
diffstat | 2 files changed, 11 insertions(+), 12 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Thu Dec 16 22:30:32 2010 +0300 1.2 +++ b/allpy/base.py Thu Dec 16 23:42:14 2010 +0300 1.3 @@ -179,10 +179,9 @@ 1.4 1.5 File must contain exactly one sequence. 1.6 """ 1.7 - sequences = fasta.parse_file(file) 1.8 - assert len(sequences) == 1 1.9 - name, description = sequences.keys()[0] 1.10 - return cls(sequences[header], name, description, file.name) 1.11 + sequence, = fasta.parse_file(file) 1.12 + name, description, body = sequence 1.13 + return cls(body, name, description, file.name) 1.14 1.15 class Alignment(object): 1.16 """Alignment. It is a list of Columns.""" 1.17 @@ -214,7 +213,7 @@ 1.18 def append_gapped_line(self, line, name='', description='', source='', 1.19 gaps=default_gaps): 1.20 """Add row from a line of one-letter codes and gaps.""" 1.21 - Sequence = cls.sequence_type 1.22 + Sequence = self.sequence_type 1.23 not_gap = lambda (i, char): char not in gaps 1.24 without_gaps = util.remove_each(line, gaps) 1.25 sequence = Sequence(without_gaps, name, description, source) 1.26 @@ -226,7 +225,7 @@ 1.27 # - it's position in alignment (the first attached number, j) 1.28 # - it's position in sequence (the second attached number, i) 1.29 for i, (j, char) in enumerate(filter(not_gap, enumerate(line))): 1.30 - self.column_at(j)[seq] = sequence[i] 1.31 + self.column_at(j)[sequence] = sequence[i] 1.32 self.sequences.append(sequence) 1.33 1.34 def column_at(self, n): 1.35 @@ -245,7 +244,7 @@ 1.36 def from_fasta(cls, file, gaps=default_gaps): 1.37 """Create new alignment from FASTA file.""" 1.38 self = cls() 1.39 - for ((name, description), body) in fasta.parse_file(file): 1.40 + for (name, description, body) in fasta.parse_file(file): 1.41 self.append_gapped_line(body, name, description, gaps) 1.42 return self 1.43
2.1 --- a/allpy/fasta.py Thu Dec 16 22:30:32 2010 +0300 2.2 +++ b/allpy/fasta.py Thu Dec 16 23:42:14 2010 +0300 2.3 @@ -1,9 +1,9 @@ 2.4 -def parse_fasta(file): 2.5 +def parse_file(file): 2.6 """Parse fasta file, remove spaces and newlines from sequence bodies. 2.7 2.8 - Return a dict of { (name, description) : sequence_body }. 2.9 + Return a list of tuples (name, description, sequence_body). 2.10 """ 2.11 - sequences = {} 2.12 + sequences = [] 2.13 for part in file.read().split(">"): 2.14 header, _, body = part.partition("\n") 2.15 header = header.lstrip(">").strip() 2.16 @@ -11,10 +11,10 @@ 2.17 name = name.strip() 2.18 description = description.strip() 2.19 body = body.replace(" ", "").replace("\n", "") 2.20 - sequences[name, description] = body 2.21 + sequences.append((name, description, body)) 2.22 return sequences 2.23 2.24 -def save_fasta(out_file, string, name, description='', long_line=70): 2.25 +def save_file(out_file, string, name, description='', long_line=70): 2.26 """ Saves given string to out_file in fasta_format 2.27 2.28 Splits long lines to substrings of length=long_line