allpy
changeset 635:82a5d611c32d
Implemented processor.FixOrder and processor.BypassEmpty as processors instead of hacky fix_order and fix_empty helper functions
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Tue, 24 May 2011 22:12:57 +0400 |
parents | 106a142660b9 |
children | bfe1f66a924c |
files | allpy/processors.py |
diffstat | 1 files changed, 73 insertions(+), 36 deletions(-) [+] |
line diff
1.1 --- a/allpy/processors.py Fri May 20 13:35:09 2011 +0400 1.2 +++ b/allpy/processors.py Tue May 24 22:12:57 2011 +0400 1.3 @@ -4,37 +4,6 @@ 1.4 import os 1.5 from tempfile import NamedTemporaryFile 1.6 1.7 -def sequence_id(sequence): 1.8 - return sequence.name + " " + sequence.description 1.9 - 1.10 -def fix_missing(old_block, new_block): 1.11 - """Add to new block sequences it is missing from old block. 1.12 - 1.13 - New added sequences are empty. 1.14 - """ 1.15 - new_sequence_names = map(sequence_id, new_block.sequences) 1.16 - for sequence in old_block.sequences: 1.17 - if sequence_id(sequence) not in new_sequence_names: 1.18 - clone = sequence.types.Sequence() 1.19 - # XXX: this copies descriptions but not contents 1.20 - # XXX: and that is exactly what we want 1.21 - vars(clone).update(vars(sequence)) 1.22 - new_block.sequences.append(clone) 1.23 - return new_block 1.24 - 1.25 -def fix_ordering(old_block, new_block): 1.26 - """Reorder sequences in new_block the same way as in old_block. 1.27 - 1.28 - Return new_block. 1.29 - """ 1.30 - new_sequences = dict([ (sequence_id(sequence), sequence) 1.31 - for sequence in new_block.sequences 1.32 - ]) 1.33 - new_block.sequences = [] 1.34 - for sequence in old_block.sequences: 1.35 - new_block.sequences.append(new_sequences[sequence_id(sequence)]) 1.36 - return new_block 1.37 - 1.38 class ExternalCommand(object): 1.39 """Use external command to process block. 1.40 1.41 @@ -57,7 +26,7 @@ 1.42 os.unlink(outfile.name) 1.43 return new_alignment 1.44 1.45 -class Muscle(ExternalCommand): 1.46 +class _Muscle(ExternalCommand): 1.47 """Realign block with muscle.""" 1.48 1.49 def __init__(self, remove_gaps=False): 1.50 @@ -75,10 +44,12 @@ 1.51 _block.sequences = list(block.sequences) 1.52 block = _block 1.53 block.realign(Left()) 1.54 - new_block = ExternalCommand.__call__(self, block) 1.55 - return fix_ordering(block, fix_missing(block, new_block)) 1.56 + return ExternalCommand.__call__(self, block) 1.57 1.58 -class Needle(ExternalCommand): 1.59 +def Muscle(remove_gaps=False): 1.60 + return FixOrdering(BypassEmpty(_Muscle())) 1.61 + 1.62 +class _Needle(ExternalCommand): 1.63 """Realign block with needle.""" 1.64 1.65 def __init__(self, **kwargs): 1.66 @@ -102,10 +73,13 @@ 1.67 'seq1': block.sequences[0].name, 1.68 'seq2': block.sequences[1].name, 1.69 } 1.70 - return fix_ordering(block, ExternalCommand.__call__(self, block)) 1.71 + return ExternalCommand.__call__(self, block) 1.72 finally: 1.73 self.command = orig_command 1.74 1.75 +def Needle(**kwargs): 1.76 + return FixOrdering(_Needle(**kwargs)) 1.77 + 1.78 class Flush(object): 1.79 """Flush all non-gap monomers to one side.""" 1.80 def __call__(self, alignment): 1.81 @@ -133,4 +107,67 @@ 1.82 def _padding_sizes(self, n_gaps): 1.83 return n_gaps // 2, (n_gaps + 1) // 2 1.84 1.85 +class BypassEmpty(object): 1.86 + """Metaprocessor: only apply processor to non-empty rows in alignment.""" 1.87 + 1.88 + def __init__(self, processor): 1.89 + self.processor = processor 1.90 + 1.91 + def __call__(self, block): 1.92 + sequences = [ 1.93 + row.sequence 1.94 + for row in block.rows_as_lists() 1.95 + if len(filter(None, row)) != 0 1.96 + ] 1.97 + in_block = block.types.Block.from_alignment(block, sequences=sequences) 1.98 + out_block = self.processor(in_block) 1.99 + sequence_set = set(sequences) 1.100 + for sequence in block.sequences: 1.101 + if sequence not in sequence_set: 1.102 + out_block.sequences.append(sequence) 1.103 + return out_block 1.104 + 1.105 +class FixOrdering(object): 1.106 + """Metaprocessor: fix sequence ordering output by processor.""" 1.107 + 1.108 + def __init__(self, processor, id=None): 1.109 + """Create the processor. 1.110 + 1.111 + Arguments: 1.112 + 1.113 + - `processor` -- a processor to fix ordering of 1.114 + - `id` -- a function mapping sequence to it's identity, 1.115 + identity is some hashable value that does not change after applying 1.116 + the processor; e.g. it is sequence name or a pair of name and 1.117 + description; defaults to FixOrdering.id_by_name_description 1.118 + """ 1.119 + if id is None: 1.120 + id = self.id_by_name_description 1.121 + self.processor = processor 1.122 + self.id = id 1.123 + 1.124 + def __call__(self, block): 1.125 + out_block = self.processor(block) 1.126 + sequences = dict(( 1.127 + (self.id(sequence), sequence) 1.128 + for sequence in out_block.sequences 1.129 + )) 1.130 + out_block.sequences = [ 1.131 + sequences[self.id(old_sequence)] 1.132 + for old_sequence in block.sequences 1.133 + ] 1.134 + return out_block 1.135 + 1.136 + @staticmethod 1.137 + def id_by_name_description(sequence): 1.138 + return (sequence.name, sequence.description) 1.139 + 1.140 + @staticmethod 1.141 + def id_by_name(sequence): 1.142 + return sequence.name 1.143 + 1.144 + @staticmethod 1.145 + def id_by_description(sequence): 1.146 + return sequence.description 1.147 + 1.148 # vim: set et ts=4 sts=4 sw=4: