Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/tip/allpy/processors.py
Дата изменения: Unknown
Дата индексирования: Tue Apr 12 07:56:06 2016
Кодировка:
allpy: b556c96c6719 allpy/processors.py

allpy

view allpy/processors.py @ 1168:b556c96c6719

blocks3d/www Makefile: never check certificates of github, they are too confusing for wget
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Mon, 26 May 2014 17:20:29 +0400
parents f05b08e13072
children
line source
1 """Processors for Alignment.process and Block.process.
2 """
4 import os
5 from tempfile import NamedTemporaryFile
6 from util import Silence
7 from copy import deepcopy
9 #
10 # External command processors: processors that pass their output to some
11 # other program (via call to os.system).
12 #
14 class ExternalCommand(object):
15 """Use external command to process block.
17 External command will receive FASTA-file and must return answer as a
18 FASTA-file with the same ordering of sequences.
19 """
21 def __init__(self, command):
22 self.command = command
24 def __call__(self, block):
25 kwargs = {'prefix': 'allpy_processor_', 'delete': True}
26 with NamedTemporaryFile(**kwargs) as infile:
27 with NamedTemporaryFile('rb', **kwargs) as outfile:
28 block.to_file(infile)
29 infile.flush()
30 subst_vars = {'infile': infile.name, 'outfile': outfile.name}
31 os.system(self.command % subst_vars)
32 Alignment = block.types.Alignment
33 new_alignment = Alignment().append_file(outfile)
34 return new_alignment
36 #
37 # Standard (re)alignment tools.
38 #
40 class _Muscle(ExternalCommand):
41 """Realign block with muscle.
43 Arguments:
45 - remove_gaps -- drop all gaps from alignment before realigning
46 """
48 def __init__(self, remove_gaps=False):
49 self.remove_gaps = remove_gaps
50 cmd = 'muscle -in %(infile)s -out %(outfile)s'
51 ExternalCommand.__init__(self, cmd)
53 def __call__(self, block):
54 if self.remove_gaps:
55 block = Left()(block)
56 with Silence(dup="stderr"):
57 return ExternalCommand.__call__(self, block)
59 def Muscle(remove_gaps=False):
60 """"Realign block with Muscle.
62 Accepted arguments:
64 - `remove_gaps` -- make fresh alignment (remove gaps before aligning);
65 otherwise, gaps are retained and muscle performs subalignment of
66 an existing alignment.
67 """
68 return FixNamesAndOrder(BypassEmpty(_Muscle()))
70 class _Needle(ExternalCommand):
71 """Realign block with needle.
73 WARNING! This MUST be wrapped in `FixNamesAndOrder()`.
74 """
76 def __init__(self, **kwargs):
77 if 'end' in ''.join(kwargs.keys()):
78 kwargs['endweight'] = 'Y'
79 kwargs['asequence'] = '%(infile)s:1'
80 kwargs['bsequence'] = '%(infile)s:2'
81 kwargs['outfile'] = '%(outfile)s'
82 kwargs['aformat3'] = 'fasta'
83 args = ['-%s %s' % (key, value) for key, value in kwargs.items()]
84 cmdline = ' '.join(['needle', '-auto'] + args)
85 ExternalCommand.__init__(self, cmdline)
87 def __call__(self, block):
88 assert len(block.sequences) == 2, "Needle needs exactly two sequences"
89 with Silence(dup="stderr"):
90 return ExternalCommand.__call__(self, block)
92 def Needle(**kwargs):
93 """Realign block with Needle.
95 Arguments are the same as accepted by needle command line program.
96 """
97 return FixNamesAndOrder(_Needle(**kwargs))
99 #
100 # Trivial processors (move all gaps to one side)
103 class Flush(object):
104 """Flush all non-gap monomers to one side."""
105 def __call__(self, alignment):
106 result = alignment.types.Alignment()
107 for row in alignment.rows_as_lists():
108 sequence = row.sequence
109 no_gaps = filter(None, row)
110 llen, rlen = self._padding_sizes(len(row) - len(no_gaps))
111 row = [None] * llen + no_gaps + [None] * rlen
112 result.append_row_with_gaps(row, sequence)
113 return result
115 class Left(Flush):
116 """Flush all non-gap monomers to the left."""
117 def _padding_sizes(self, n_gaps):
118 return 0, n_gaps
120 class Right(Flush):
121 """Flush all non-gap monomers to the right."""
122 def _padding_sizes(self, n_gaps):
123 return n_gaps, 0
125 class Center(Flush):
126 """Center all non-gap monomers in alignment."""
127 def _padding_sizes(self, n_gaps):
128 return n_gaps // 2, (n_gaps + 1) // 2
131 # Helper processors that are combined with other processors to work
134 class BypassEmpty(object):
135 """Metaprocessor: only apply processor to non-empty rows in alignment."""
137 def __init__(self, processor):
138 self.processor = processor
140 def __call__(self, block):
141 sequences = [
142 row.sequence
143 for row in block.rows_as_lists()
144 if len(filter(None, row)) != 0
146 in_block = block.types.Block.from_alignment(block, sequences=sequences)
147 out_block = self.processor(in_block)
148 sequence_set = set(sequences)
149 for sequence in block.sequences:
150 if sequence not in sequence_set:
151 out_block.sequences.append(sequence)
152 return out_block
154 class FixOrdering(object):
155 """Metaprocessor: fix sequence ordering output by processor."""
157 def __init__(self, processor, id=None):
158 """Create the processor.
160 Arguments:
162 - `processor` -- a processor to fix ordering of
163 - `id` -- a function mapping sequence to it's identity,
164 identity is some hashable value that does not change after applying
165 the processor; e.g. it is sequence name or a pair of name and
166 description; defaults to FixOrdering.id_by_name_description
167 """
168 if id is None:
169 id = self.id_by_name_description
170 self.processor = processor
171 self.id = id
173 def __call__(self, block):
174 out_block = self.processor(block)
175 sequences = dict((
176 (self.id(sequence), sequence)
177 for sequence in out_block.sequences
178 ))
179 assert len(sequences) == len(block.sequences), \
180 "Sequence identificators must be unique!"
181 out_block.sequences = [
182 sequences[self.id(old_sequence)]
183 for old_sequence in block.sequences
185 return out_block
187 @staticmethod
188 def id_by_name_description(sequence):
189 return (sequence.name, sequence.description)
191 @staticmethod
192 def id_by_name(sequence):
193 return sequence.name
195 @staticmethod
196 def id_by_description(sequence):
197 return sequence.description
199 class FixNamesAndOrder(object):
200 """Metaprocessor: avoid problems with sequence names and sequence order.
202 Temporarily replace sequence names with numbers while processor. Don't
203 bother restoring sequence names/desceriptions, since the caller will only
204 rely on sequence order.
205 """
207 def __init__(self, processor):
208 self.processor = processor
210 def __call__(self, block):
211 in_block = deepcopy(block)
212 for n, sequence in enumerate(in_block.sequences):
213 sequence.name = str(n)
214 sequence.description = ""
215 out_block = self.processor(in_block)
216 out_block.sequences.sort(key=lambda s: int(s.name))
217 return out_block
219 # vim: set et ts=4 sts=4 sw=4: