Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/82a5d611c32d
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:58:22 2012
Кодировка:
allpy: 82a5d611c32d

allpy

changeset 635:82a5d611c32d

Implemented processor.FixOrder and processor.BypassEmpty as processors instead of hacky fix_order and fix_empty helper functions
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Tue, 24 May 2011 22:12:57 +0400
parents 106a142660b9
children bfe1f66a924c
files allpy/processors.py
diffstat 1 files changed, 73 insertions(+), 36 deletions(-) [+]
line diff
     1.1 --- a/allpy/processors.py	Fri May 20 13:35:09 2011 +0400
     1.2 +++ b/allpy/processors.py	Tue May 24 22:12:57 2011 +0400
     1.3 @@ -4,37 +4,6 @@
     1.4  import os
     1.5  from tempfile import NamedTemporaryFile
     1.6  
     1.7 -def sequence_id(sequence):
     1.8 -    return sequence.name + " " + sequence.description
     1.9 -
    1.10 -def fix_missing(old_block, new_block):
    1.11 -    """Add to new block sequences it is missing from old block.
    1.12 -
    1.13 -    New added sequences are empty.
    1.14 -    """
    1.15 -    new_sequence_names = map(sequence_id, new_block.sequences)
    1.16 -    for sequence in old_block.sequences:
    1.17 -        if sequence_id(sequence) not in new_sequence_names:
    1.18 -            clone = sequence.types.Sequence()
    1.19 -            # XXX: this copies descriptions but not contents
    1.20 -            # XXX: and that is exactly what we want
    1.21 -            vars(clone).update(vars(sequence))
    1.22 -            new_block.sequences.append(clone)
    1.23 -    return new_block
    1.24 -
    1.25 -def fix_ordering(old_block, new_block):
    1.26 -    """Reorder sequences in new_block the same way as in old_block.
    1.27 -
    1.28 -    Return new_block.
    1.29 -    """
    1.30 -    new_sequences = dict([ (sequence_id(sequence), sequence)
    1.31 -        for sequence in new_block.sequences
    1.32 -    ])
    1.33 -    new_block.sequences = []
    1.34 -    for sequence in old_block.sequences:
    1.35 -        new_block.sequences.append(new_sequences[sequence_id(sequence)])
    1.36 -    return new_block
    1.37 -
    1.38  class ExternalCommand(object):
    1.39      """Use external command to process block.
    1.40  
    1.41 @@ -57,7 +26,7 @@
    1.42          os.unlink(outfile.name)
    1.43          return new_alignment
    1.44  
    1.45 -class Muscle(ExternalCommand):
    1.46 +class _Muscle(ExternalCommand):
    1.47      """Realign block with muscle."""
    1.48  
    1.49      def __init__(self, remove_gaps=False):
    1.50 @@ -75,10 +44,12 @@
    1.51              _block.sequences = list(block.sequences)
    1.52              block = _block
    1.53              block.realign(Left())
    1.54 -        new_block = ExternalCommand.__call__(self, block)
    1.55 -        return fix_ordering(block, fix_missing(block, new_block))
    1.56 +        return ExternalCommand.__call__(self, block)
    1.57  
    1.58 -class Needle(ExternalCommand):
    1.59 +def Muscle(remove_gaps=False):
    1.60 +    return FixOrdering(BypassEmpty(_Muscle()))
    1.61 +
    1.62 +class _Needle(ExternalCommand):
    1.63      """Realign block with needle."""
    1.64  
    1.65      def __init__(self, **kwargs):
    1.66 @@ -102,10 +73,13 @@
    1.67                  'seq1': block.sequences[0].name,
    1.68                  'seq2': block.sequences[1].name,
    1.69              }
    1.70 -            return fix_ordering(block, ExternalCommand.__call__(self, block))
    1.71 +            return ExternalCommand.__call__(self, block)
    1.72          finally:
    1.73              self.command = orig_command
    1.74  
    1.75 +def Needle(**kwargs):
    1.76 +    return FixOrdering(_Needle(**kwargs))
    1.77 +
    1.78  class Flush(object):
    1.79      """Flush all non-gap monomers to one side."""
    1.80      def __call__(self, alignment):
    1.81 @@ -133,4 +107,67 @@
    1.82      def _padding_sizes(self, n_gaps):
    1.83          return n_gaps // 2, (n_gaps + 1) // 2
    1.84  
    1.85 +class BypassEmpty(object):
    1.86 +    """Metaprocessor: only apply processor to non-empty rows in alignment."""
    1.87 +
    1.88 +    def __init__(self, processor):
    1.89 +        self.processor = processor
    1.90 +
    1.91 +    def __call__(self, block):
    1.92 +        sequences = [
    1.93 +            row.sequence
    1.94 +            for row in block.rows_as_lists()
    1.95 +            if len(filter(None, row)) != 0
    1.96 +        ]
    1.97 +        in_block = block.types.Block.from_alignment(block, sequences=sequences)
    1.98 +        out_block = self.processor(in_block)
    1.99 +        sequence_set = set(sequences)
   1.100 +        for sequence in block.sequences:
   1.101 +            if sequence not in sequence_set:
   1.102 +                out_block.sequences.append(sequence)
   1.103 +        return out_block
   1.104 +
   1.105 +class FixOrdering(object):
   1.106 +    """Metaprocessor: fix sequence ordering output by processor."""
   1.107 +
   1.108 +    def __init__(self, processor, id=None):
   1.109 +        """Create the processor.
   1.110 +
   1.111 +        Arguments:
   1.112 +
   1.113 +        - `processor` -- a processor to fix ordering of
   1.114 +        - `id` -- a function mapping sequence to it's identity,
   1.115 +            identity is some hashable value that does not change after applying
   1.116 +            the processor; e.g. it is sequence name or a pair of name and
   1.117 +            description; defaults to FixOrdering.id_by_name_description
   1.118 +        """
   1.119 +        if id is None:
   1.120 +            id = self.id_by_name_description
   1.121 +        self.processor = processor
   1.122 +        self.id = id
   1.123 +
   1.124 +    def __call__(self, block):
   1.125 +        out_block = self.processor(block)
   1.126 +        sequences = dict((
   1.127 +            (self.id(sequence), sequence)
   1.128 +            for sequence in out_block.sequences
   1.129 +        ))
   1.130 +        out_block.sequences = [
   1.131 +            sequences[self.id(old_sequence)]
   1.132 +            for old_sequence in block.sequences
   1.133 +        ]
   1.134 +        return out_block
   1.135 +
   1.136 +    @staticmethod
   1.137 +    def id_by_name_description(sequence):
   1.138 +        return (sequence.name, sequence.description)
   1.139 +
   1.140 +    @staticmethod
   1.141 +    def id_by_name(sequence):
   1.142 +        return sequence.name
   1.143 +
   1.144 +    @staticmethod
   1.145 +    def id_by_description(sequence):
   1.146 +        return sequence.description
   1.147 +
   1.148  # vim: set et ts=4 sts=4 sw=4: