allpy
changeset 537:364232e42888 1.2.0
Reimplemented flush(direction) as processor
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Mon, 28 Feb 2011 21:17:41 +0300 |
parents | 1a8ddc6e2eee |
children | d73ad479a7a5 |
files | allpy/base.py allpy/processors.py test/test_base.py |
diffstat | 3 files changed, 60 insertions(+), 27 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Mon Feb 28 19:24:28 2011 +0300 1.2 +++ b/allpy/base.py Mon Feb 28 21:17:41 2011 +0300 1.3 @@ -180,6 +180,15 @@ 1.4 self.sequences.append(sequence) 1.5 return self 1.6 1.7 + def append_row_with_gaps(self, row, sequence): 1.8 + """Add row from row_as_list representation and sequence. Return self.""" 1.9 + self.sequences.append(sequence) 1.10 + self._pad_to_width(len(row)) 1.11 + for column, monomer in zip(self.columns, row): 1.12 + if monomer: 1.13 + column[sequence] = monomer 1.14 + return self 1.15 + 1.16 def _pad_to_width(self, n): 1.17 """Pad alignment with empty columns on the right to width n.""" 1.18 for i in range(len(self.columns), n): 1.19 @@ -285,35 +294,20 @@ 1.20 # Alignment / Block editing methods 1.21 # ================================= 1.22 1.23 - def _flush_row(self, row, whence='left'): 1.24 - """Helper for `flush`: flush to one side all monomers in one row.""" 1.25 - row = filter(None, row) 1.26 - padding = [None] * len(self.columns) 1.27 - if whence == 'left': 1.28 - return row + padding 1.29 - if whence == 'right': 1.30 - return padding + row 1.31 - if whence == 'center': 1.32 - pad_len = (len(self.columns) - len(row)) // 2 1.33 - # vvv fix padding for case when length is odd: better have more 1.34 - pad_len += len(self.columns) - 2 * pad_len 1.35 - padding = [None] * pad_len 1.36 - return padding + row + padding 1.37 - assert True, "whence must be either 'left' or 'right' or 'center'" 1.38 - 1.39 def flush(self, whence='left'): 1.40 """Remove all gaps from alignment and flush results to one side. 1.41 1.42 `whence` must be one of 'left', 'right' or 'center' 1.43 """ 1.44 - for row in self.rows_as_lists(): 1.45 - sequence = row.sequence 1.46 - row = self._flush_row(row, whence) 1.47 - for monomer, column in zip(row, self.columns): 1.48 - if monomer: 1.49 - column[sequence] = monomer 1.50 - elif sequence in column: 1.51 - del column[sequence] 1.52 + if whence == 'left': 1.53 + from processors import Left as Flush 1.54 + elif whence == 'right': 1.55 + from processors import Right as Flush 1.56 + elif whence == 'center': 1.57 + from processors import Center as Flush 1.58 + else: 1.59 + raise AssertionError, "Whence must be left, right or center" 1.60 + self.realign(Flush()) 1.61 1.62 def remove_gap_columns(self): 1.63 """Remove all empty columns.""" 1.64 @@ -391,6 +385,17 @@ 1.65 copy_contents = function.copy_contents 1.66 self._replace_contents(new, copy_descriptions, copy_contents) 1.67 1.68 + def realign(self, function): 1.69 + """Realign self. 1.70 + 1.71 + I.e.: apply function to self to produce a new alignment, then update 1.72 + self to have the same gap patterns as the new alignment. 1.73 + 1.74 + This is the same as process(function, False, False) 1.75 + """ 1.76 + new = function(self) 1.77 + self._replace_column_contents(new) 1.78 + 1.79 class Column(dict): 1.80 """Column of alignment. 1.81
2.1 --- a/allpy/processors.py Mon Feb 28 19:24:28 2011 +0300 2.2 +++ b/allpy/processors.py Mon Feb 28 21:17:41 2011 +0300 2.3 @@ -67,4 +67,31 @@ 2.4 new_block = ExternalCommand.__call__(self, block) 2.5 return fix_ordering(block, fix_missing(block, new_block)) 2.6 2.7 +class Flush(object): 2.8 + """Flush all non-gap monomers to one side.""" 2.9 + def __call__(self, alignment): 2.10 + result = alignment.types.Alignment() 2.11 + for row in alignment.rows_as_lists(): 2.12 + sequence = row.sequence 2.13 + no_gaps = filter(None, row) 2.14 + llen, rlen = self._padding_sizes(len(row) - len(no_gaps)) 2.15 + row = [None] * llen + no_gaps + [None] * rlen 2.16 + result.append_row_with_gaps(row, sequence) 2.17 + return result 2.18 + 2.19 +class Left(Flush): 2.20 + """Flush all non-gap monomers to the left.""" 2.21 + def _padding_sizes(self, n_gaps): 2.22 + return 0, n_gaps 2.23 + 2.24 +class Right(Flush): 2.25 + """Flush all non-gap monomers to the right.""" 2.26 + def _padding_sizes(self, n_gaps): 2.27 + return n_gaps, 0 2.28 + 2.29 +class Center(Flush): 2.30 + """Center all non-gap monomers in alignment.""" 2.31 + def _padding_sizes(self, n_gaps): 2.32 + return n_gaps // 2, (n_gaps + 1) // 2 2.33 + 2.34 # vim: set et ts=4 sts=4 sw=4:
3.1 --- a/test/test_base.py Mon Feb 28 19:24:28 2011 +0300 3.2 +++ b/test/test_base.py Mon Feb 28 21:17:41 2011 +0300 3.3 @@ -1,5 +1,6 @@ 3.4 import allpy.base as b 3.5 import allpy.protein as p 3.6 +from allpy import processors 3.7 from StringIO import StringIO 3.8 3.9 def test_new_monomers(): 3.10 @@ -42,21 +43,21 @@ 3.11 "-------------RST", 3.12 ) 3.13 3.14 - a.flush("left") 3.15 + a.process(processors.Left()) 3.16 assert_alignment(a, 3.17 "ACDEF-----------", 3.18 "GHIKLMPQ--------", 3.19 "RST-------------", 3.20 ) 3.21 3.22 - a.flush("center") 3.23 + a.process(processors.Center()) 3.24 assert_alignment(a, 3.25 "-----ACDEF------", 3.26 "----GHIKLMPQ----", 3.27 "------RST-------", 3.28 ) 3.29 3.30 - a.flush("right") 3.31 + a.process(processors.Right()) 3.32 assert_alignment(a, 3.33 "-----------ACDEF", 3.34 "--------GHIKLMPQ",