allpy

changeset 928:b0029b64ef06
Automated merge with ssh://kodomo/allpy
author: Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date: Wed, 09 Nov 2011 00:49:31 +0300
parents: 994bd893504c b8ddd35fac65
children: 73422710e06e
diffstat: 5 files changed, 91 insertions(+), 9 deletions(-) [+]
[-]

allpy/fileio/auto.py 3

allpy/fileio/fastq.py 47

allpy/markups.py 3

allpy/processors.py 20

test/test_fastq.py 27 allpy/fileio/auto.py 3 allpy/fileio/fastq.py 47 allpy/markups.py 3 allpy/processors.py 20 test/test_fastq.py 27
allpy/fileio/auto.py 3
allpy/fileio/fastq.py 47
allpy/markups.py 3
     1.1 --- a/allpy/fileio/auto.py	Mon Nov 07 18:20:12 2011 +0300
     1.2 +++ b/allpy/fileio/auto.py	Wed Nov 09 00:49:31 2011 +0300
     1.3 @@ -1,4 +1,5 @@
     1.4  import fasta
     1.5 +import fastq
     1.6  import markup
     1.7  import bio
     1.8  import emboss
     1.9 @@ -8,6 +9,8 @@
    1.10      def __new__(cls, file, format="fasta", **kw):
    1.11          if format == "fasta":
    1.12              return fasta.FastaFile(file, **kw)
    1.13 +        elif format == "fastq":
    1.14 +            return fastq.FastqFile(file, **kw)
    1.15          elif format == 'markup':
    1.16              return markup.MarkupFile(file, **kw)
    1.17          elif format.startswith('markup:'):

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/allpy/fileio/fastq.py	Wed Nov 09 00:49:31 2011 +0300
     2.3 @@ -0,0 +1,47 @@
     2.4 +class FastqFile(object):
     2.5 +    """FastQ parser & writer."""
     2.6 +
     2.7 +    def __init__(self, file, format='fasta', gaps=None):
     2.8 +        # NOTE: 'gaps' argument is ignored!
     2.9 +        self.file = file
    2.10 +        self.format = format
    2.11 +        self.phred_base = 33
    2.12 +        try:
    2.13 +            self.source = file.name
    2.14 +        except Exception:
    2.15 +            self.source = ''
    2.16 +
    2.17 +    def write_alignment(self, alignment):
    2.18 +        """Append alignment to the file."""
    2.19 +        write = self.file.write
    2.20 +        for row in alignment.rows_as_lists():
    2.21 +            header = '%s %s' % (row.sequence.name, row.sequence.description)
    2.22 +            chars = "".join(monomer.code1 for monomer in row)
    2.23 +            phred = "".join(
    2.24 +                chr(monomer.quality + self.phred_base)
    2.25 +                for monomer in row
    2.26 +            )
    2.27 +            write('@%s\n%s\n+\n%s\n' % (header, row, phred))
    2.28 +
    2.29 +    def read_alignment(self, alignment):
    2.30 +        """Append file contents to the alignment."""
    2.31 +        for n, line in enumerate(self.file):
    2.32 +            n = n % 4
    2.33 +            line = line.rstrip('\n')
    2.34 +            if n == 0:
    2.35 +                assert line[0] == '@'
    2.36 +                name, description = line.split(' ', 1)
    2.37 +            elif n == 1:
    2.38 +                # XXX: this is ugly!
    2.39 +                # XXX append_row_from_string should return the new Sequence
    2.40 +                alignment.append_row_from_string(line, name, description, self.source, [])
    2.41 +                sequence = alignment.sequences[-1]
    2.42 +            elif n == 2:
    2.43 +                assert line[0] == '+'
    2.44 +            elif n == 3:
    2.45 +                assert len(sequence) == len(line)
    2.46 +                quality = sequence.add_markup('quality')
    2.47 +                for monomer, phred in zip(sequence, line):
    2.48 +                    monomer.quality = ord(phred) - self.phred_base
    2.49 +
    2.50 +# vim: set et ts=4 sts=4 sw=4:

     3.1 --- a/allpy/markups.py	Mon Nov 07 18:20:12 2011 +0300
     3.2 +++ b/allpy/markups.py	Wed Nov 09 00:49:31 2011 +0300
     3.3 @@ -152,6 +152,9 @@
     3.4              return monomer.code1.lower()
     3.5          raise AssertionError("Unknown monomer case")
     3.6  
     3.7 +class SequenceQualityMarkup(base.SequenceMarkup, IntMarkupMixin):
     3.8 +    name = 'quality'
     3.9 +
    3.10  class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):
    3.11      name = 'pdb_resi'
    3.12  

     4.1 --- a/allpy/processors.py	Mon Nov 07 18:20:12 2011 +0300
     4.2 +++ b/allpy/processors.py	Wed Nov 09 00:49:31 2011 +0300
     4.3 @@ -20,15 +20,17 @@
     4.4          self.command = command
     4.5  
     4.6      def __call__(self, block):
     4.7 -        infile = NamedTemporaryFile(prefix='allpy_processor_', delete=False)
     4.8 -        outfile = NamedTemporaryFile('r', prefix='allpy_processor_', delete=False)
     4.9 -        block.to_file(infile)
    4.10 -        infile.close()
    4.11 -        os.system(self.command % {'infile': infile.name, 'outfile': outfile.name})
    4.12 -        Alignment = block.__class__
    4.13 -        new_alignment = Alignment().append_file(outfile)
    4.14 -        os.unlink(infile.name)
    4.15 -        os.unlink(outfile.name)
    4.16 +        kwargs = {'prefix': 'allpy_processor_', 'delete': False}
    4.17 +        with NamedTemporaryFile(**kwargs) as infile:
    4.18 +            with NamedTemporaryFile('r', **kwargs) as outfile:
    4.19 +                block.to_file(infile)
    4.20 +                infile.close()
    4.21 +                subst_vars = {'infile': infile.name, 'outfile': outfile.name}
    4.22 +                os.system(self.command % subst_vars)
    4.23 +                Alignment = block.__class__
    4.24 +                new_alignment = Alignment().append_file(outfile)
    4.25 +                os.unlink(infile.name)
    4.26 +                os.unlink(outfile.name)
    4.27          return new_alignment
    4.28  
    4.29  #

     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/test/test_fastq.py	Wed Nov 09 00:49:31 2011 +0300
     5.3 @@ -0,0 +1,27 @@
     5.4 +from StringIO import StringIO
     5.5 +from allpy import dna
     5.6 +
     5.7 +example = """
     5.8 +@HWI-ST992:108:D06T0ACXX:6:1107:13562:94150 1:N:0:
     5.9 +TCAATGGTGCTGTATAAAACACGTAGAATTACTATTATGGATTATATTCGTCTTCATTTGTTAAAATCAGTGGCTTAACTGGCTAAATTTTGTGGACTATA
    5.10 ++
    5.11 +#8::4AD,A+C<D>EDEIEDFIE:FF<A@B+1??999E?<DDE<9:DADBD;D=8BD?B@B=B)===)7=..)-66@CCD)?=A;@?@AAAB>;(>AAAA>
    5.12 +@HWI-ST992:108:D06T0ACXX:6:1107:13630:94150 1:N:0:
    5.13 +GGTGAAGCGCTTTGAGTCCCTCTCCAGCCAGCGCTTCCTGCAGGGCGTGATCGTGGCGCTCATCACGGGCATGTTCTGGTGGCAGCGCGGCCGCGGCGGCT
    5.14 ++
    5.15 +<@@DDFDBHHGFHIAHHIIIIIIIIIGHGIGAGHIHIIIIHIIII?G@BHHHHABDDDDDDDDDDDD?B@BCD@C@CDDACDDDDDDDDDBDBDBDD@B<>
    5.16 +""".strip()
    5.17 +
    5.18 +def test_fastq_io():
    5.19 +    file = StringIO(example)
    5.20 +    aln = dna.Alignment().append_file(file, format="fastq")
    5.21 +    assert len(aln.sequences) == 2
    5.22 +    a, b = aln.sequences
    5.23 +    assert str(a)[:10] == "TCAATGGTGC"
    5.24 +    assert str(b)[:10] == "GGTGAAGCGC"
    5.25 +    qa = list(a.markups['quality'].sorted_values())
    5.26 +    assert qa[:10] == [2, 23, 25, 25, 19, 32, 35, 11, 32, 10]
    5.27 +    qb = list(b.markups['quality'].sorted_values())
    5.28 +    assert qb[:10] == [27, 31, 31, 35, 35, 37, 35, 33, 39, 39]
    5.29 +
    5.30 +# vim: set et ts=4 sts=4 sw=4: