allpy
diff allpy/base.py @ 306:88631907f23d
Allow user to specify gap characters when parsing alignment
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Thu, 16 Dec 2010 19:24:22 +0300 |
parents | 578bc81d5e62 |
children | 237deca17963 |
line diff
1.1 --- a/allpy/base.py Thu Dec 16 19:17:08 2010 +0300 1.2 +++ b/allpy/base.py Thu Dec 16 19:24:22 2010 +0300 1.3 @@ -10,6 +10,9 @@ 1.4 from Bio.PDB.DSSP import make_dssp_dict 1.5 import data.codes 1.6 1.7 +default_gaps = set((".", "-", "~")) 1.8 +"""Set of characters to recoginze as gaps when parsing alignment.""" 1.9 + 1.10 class MonomerType(object): 1.11 """Class of monomer types. 1.12 1.13 @@ -211,12 +214,13 @@ 1.14 for i, monomer in enumerate(sequence): 1.15 self.column_at(i)[sequence] = monomer 1.16 1.17 - def append_gapped_line(self, line, name='', description='', source=''): 1.18 + def append_gapped_line(self, line, name='', description='', source='', 1.19 + gaps=default_gaps): 1.20 """Add row from a line of one-letter codes and gaps.""" 1.21 Sequence = cls.sequence_type 1.22 - not_gap = lambda (i, char): char != "-" 1.23 - no_gaps = line.replace("-", "") 1.24 - sequence = Sequence(no_gaps, name, description, source) 1.25 + not_gap = lambda (i, char): char not in gaps 1.26 + without_gaps = util.remove_each(line, gaps) 1.27 + sequence = Sequence(without_gaps, name, description, source) 1.28 # The following line has some simple magic: 1.29 # 1. attach natural numbers to monomers 1.30 # 2. delete gaps 1.31 @@ -241,11 +245,11 @@ 1.32 # ==================== 1.33 1.34 @classmethod 1.35 - def from_fasta(cls, file): 1.36 + def from_fasta(cls, file, gaps=default_gaps): 1.37 """Create new alignment from FASTA file.""" 1.38 self = cls() 1.39 for ((name, description), body) in fasta.parse_file(file): 1.40 - self.append_gapped_line(body, name, description) 1.41 + self.append_gapped_line(body, name, description, gaps) 1.42 return self 1.43 1.44 def to_fasta(self, file):