Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/ca0b757452da
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:40:08 2012
Кодировка:
allpy: ca0b757452da

allpy

changeset 1096:ca0b757452da

Added meta-processor `FixNamesAndOrder`; called from `Needle` and `Muscle` (closes #134) Processors `Needle()` and `Muscle()` now can deal with all kinds of stuff in sequence names: spaces, colons, duplicate sequence names. For readers of #134: please note, that this commit actually does exactly __nothing__ with `ExternalCommand` processor itself, rather adds a new meta-processor to be applied to it or any other processor.
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Sun, 03 Jun 2012 12:09:41 +0400
parents f05b08e13072
children 8d5388980e57
files allpy/processors.py test/test_realign.py
diffstat 2 files changed, 36 insertions(+), 30 deletions(-) [+]
line diff
     1.1 --- a/allpy/processors.py	Sat Jun 02 22:33:50 2012 +0400
     1.2 +++ b/allpy/processors.py	Sun Jun 03 12:09:41 2012 +0400
     1.3 @@ -4,6 +4,7 @@
     1.4  import os
     1.5  from tempfile import NamedTemporaryFile
     1.6  from util import Silence
     1.7 +from copy import deepcopy
     1.8  
     1.9  #
    1.10  # External command processors: processors that pass their output to some
    1.11 @@ -64,48 +65,36 @@
    1.12          otherwise, gaps are retained and muscle performs subalignment of
    1.13          an existing alignment.
    1.14      """
    1.15 -    return FixOrdering(BypassEmpty(_Muscle()))
    1.16 +    return FixNamesAndOrder(BypassEmpty(_Muscle()))
    1.17  
    1.18  class _Needle(ExternalCommand):
    1.19 -    """Realign block with needle."""
    1.20 +    """Realign block with needle.
    1.21 +
    1.22 +    WARNING! This MUST be wrapped in `FixNamesAndOrder()`.
    1.23 +    """
    1.24  
    1.25      def __init__(self, **kwargs):
    1.26          if 'end' in ''.join(kwargs.keys()):
    1.27              kwargs['endweight'] = 'Y'
    1.28 -        kwargs['asequence'] = '%%(infile)s:%(seq1)s'
    1.29 -        kwargs['bsequence'] = '%%(infile)s:%(seq2)s'
    1.30 -        kwargs['outfile'] = '%%(outfile)s'
    1.31 +        kwargs['asequence'] = '%(infile)s:1'
    1.32 +        kwargs['bsequence'] = '%(infile)s:2'
    1.33 +        kwargs['outfile'] = '%(outfile)s'
    1.34          kwargs['aformat3'] = 'fasta'
    1.35          args = ['-%s %s' % (key, value) for key, value in kwargs.items()]
    1.36          cmdline = ' '.join(['needle', '-auto'] + args)
    1.37          ExternalCommand.__init__(self, cmdline)
    1.38  
    1.39 -    def _check_sequence(self, sequence):
    1.40 -        assert sequence.name, "Needle does not allow empty sequence names"
    1.41 -        assert ":" not in sequence.name, \
    1.42 -            "Needle does not allow : in sequence names"
    1.43 -
    1.44      def __call__(self, block):
    1.45          assert len(block.sequences) == 2, "Needle needs exactly two sequences"
    1.46 -        self._check_sequence(block.sequences[0])
    1.47 -        self._check_sequence(block.sequences[1])
    1.48 -        try:
    1.49 -            orig_command = self.command
    1.50 -            self.command = self.command % {
    1.51 -                'seq1': block.sequences[0].name,
    1.52 -                'seq2': block.sequences[1].name,
    1.53 -            }
    1.54 -            with Silence(dup="stderr"):
    1.55 -                return ExternalCommand.__call__(self, block)
    1.56 -        finally:
    1.57 -            self.command = orig_command
    1.58 +        with Silence(dup="stderr"):
    1.59 +            return ExternalCommand.__call__(self, block)
    1.60  
    1.61  def Needle(**kwargs):
    1.62      """Realign block with Needle.
    1.63  
    1.64      Arguments are the same as accepted by needle command line program.
    1.65      """
    1.66 -    return FixOrdering(_Needle(**kwargs))
    1.67 +    return FixNamesAndOrder(_Needle(**kwargs))
    1.68  
    1.69  #
    1.70  # Trivial processors (move all gaps to one side)
    1.71 @@ -207,4 +196,24 @@
    1.72      def id_by_description(sequence):
    1.73          return sequence.description
    1.74  
    1.75 +class FixNamesAndOrder(object):
    1.76 +    """Metaprocessor: avoid problems with sequence names and sequence order.
    1.77 +
    1.78 +    Temporarily replace sequence names with numbers while processor. Don't
    1.79 +    bother restoring sequence names/desceriptions, since the caller will only
    1.80 +    rely on sequence order.
    1.81 +    """
    1.82 +
    1.83 +    def __init__(self, processor):
    1.84 +        self.processor = processor
    1.85 +
    1.86 +    def __call__(self, block):
    1.87 +        in_block = deepcopy(block)
    1.88 +        for n, sequence in enumerate(in_block.sequences):
    1.89 +            sequence.name = str(n)
    1.90 +            sequence.description = ""
    1.91 +        out_block = self.processor(in_block)
    1.92 +        out_block.sequences.sort(key=lambda s: int(s.name))
    1.93 +        return out_block
    1.94 +
    1.95  # vim: set et ts=4 sts=4 sw=4:
     2.1 --- a/test/test_realign.py	Sat Jun 02 22:33:50 2012 +0400
     2.2 +++ b/test/test_realign.py	Sun Jun 03 12:09:41 2012 +0400
     2.3 @@ -36,21 +36,18 @@
     2.4  		append_row_from_string("rrrr", name="a").
     2.5  		append_row_from_string("rrrr", name="a"))
     2.6  
     2.7 -@raises(AssertionError)
     2.8  def test_muscle_duplicate():
     2.9 -	"""`muscle` must be unable to deal with alignments with non-unique ids"""
    2.10 +	"""`muscle` must be able to deal with alignments with non-unique ids"""
    2.11  	example2.realign(processors.Muscle())
    2.12  
    2.13 -@raises(AssertionError)
    2.14 -def test_muscle_duplicate():
    2.15 -	"""`needle` must be unable to deal with alignments with non-unique ids"""
    2.16 +def test_needle_duplicate():
    2.17 +	"""`needle` must be able to deal with alignments with non-unique ids"""
    2.18  	example2.realign(processors.Needle())
    2.19  
    2.20  example3 = (protein.Alignment().
    2.21  		append_row_from_string("n", name="xxx").
    2.22  		append_row_from_string("m", name=""))
    2.23  
    2.24 -@raises(AssertionError)
    2.25  def test_needle_empty():
    2.26 -	"""`needle` must be unable to deal with sequences with empty name"""
    2.27 +	"""`needle` must be able to deal with sequences with empty name"""
    2.28  	example3.realign(processors.Needle())