allpy
changeset 371:9847bd92b889
Implemented (uglyishly) Alignment.process. (see #25)
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Wed, 26 Jan 2011 22:25:13 +0300 |
parents | 4f7f61a3b5e6 |
children | 670ae384aa37 |
files | allpy/base.py |
diffstat | 1 files changed, 58 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Wed Jan 26 21:34:36 2011 +0300 1.2 +++ b/allpy/base.py Wed Jan 26 22:25:13 2011 +0300 1.3 @@ -311,6 +311,64 @@ 1.4 if column == {}: 1.5 self.columns[n:n+1] = [] 1.6 1.7 + def _wipe(self): 1.8 + """Make all positions gaps (but keep sequences intact).""" 1.9 + for column in self.columns: 1.10 + for sequence in column: 1.11 + del column[sequence] 1.12 + 1.13 + def _append_row_from_list(self, new): 1.14 + sequence = new.sequence 1.15 + self.sequences.append(sequence) 1.16 + for i, monomer in enumerate(new): 1.17 + if monomer: 1.18 + self._column_at(i)[sequence] = monomer 1.19 + 1.20 + def _replace_sequence_contents(self, new): 1.21 + """Replace contents of sequences with those of new alignment.""" 1.22 + # XXX: we manually copy sequence contents here 1.23 + # XXX: we do not copy all sequence contents, only overlapping parts 1.24 + # XXX: if sequence shortens, we leave some monomers unmodified (!!!) 1.25 + for sequence, new_sequence in zip(self.sequences, new.sequences): 1.26 + sequence.name = new_sequence.name 1.27 + sequence.description = new_sequence.desription 1.28 + sequence.source = new_sequence.source 1.29 + for monomer, new_monomer in zip(sequence, new_sequence): 1.30 + monomer.__class__ = new_monomer.__class__ 1.31 + for i in range(len(sequence), len(new_sequence)): 1.32 + sequence.append(new_sequence[i]) 1.33 + for i in range(len(self.sequences), len(new.sequences)): 1.34 + self._append_row_from_list(new.rows_as_lists()[i]) 1.35 + 1.36 + def _replace_column_contents(self, new): 1.37 + """Replace column contents with those of new alignment. 1.38 + 1.39 + This requires that self.sequences and new.sequences have same length 1.40 + and same contents. (If sequences have same length but different 1.41 + contents, this effectively copies gap patterns. If sequences have 1.42 + different length, the results are meaningless and dumb). 1.43 + """ 1.44 + self._wipe() 1.45 + not_gap = lambda (a,b): a != None 1.46 + for sequence, new_row in zip(self.sequences, new.rows_as_lists()): 1.47 + assert len(sequence) == len(new_row.sequence) 1.48 + zipped = zip(sequence, filter(not_gap, enumerate(new_row))) 1.49 + for monomer, (i, _) in zipped: 1.50 + self._column_at(i)[sequence] = monomer 1.51 + 1.52 + def _replace_contents(self, new): 1.53 + """Replace alignment contents with those of other alignment.""" 1.54 + self._replace_sequence_contents(new) 1.55 + self._replace_column_conents(new) 1.56 + 1.57 + def process(self, function): 1.58 + """Apply function to the alignment (or block); inject results back. 1.59 + 1.60 + function(block) must return block with same line order. 1.61 + """ 1.62 + new = function(self) 1.63 + self.replace_contents(new) 1.64 + 1.65 class Column(dict): 1.66 """Column of alignment. 1.67