allpy
changeset 928:b0029b64ef06
Automated merge with ssh://kodomo/allpy
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Wed, 09 Nov 2011 00:49:31 +0300 |
parents | 994bd893504c b8ddd35fac65 |
children | 73422710e06e |
files | |
diffstat | 5 files changed, 91 insertions(+), 9 deletions(-) [+] |
line diff
1.1 --- a/allpy/fileio/auto.py Mon Nov 07 18:20:12 2011 +0300 1.2 +++ b/allpy/fileio/auto.py Wed Nov 09 00:49:31 2011 +0300 1.3 @@ -1,4 +1,5 @@ 1.4 import fasta 1.5 +import fastq 1.6 import markup 1.7 import bio 1.8 import emboss 1.9 @@ -8,6 +9,8 @@ 1.10 def __new__(cls, file, format="fasta", **kw): 1.11 if format == "fasta": 1.12 return fasta.FastaFile(file, **kw) 1.13 + elif format == "fastq": 1.14 + return fastq.FastqFile(file, **kw) 1.15 elif format == 'markup': 1.16 return markup.MarkupFile(file, **kw) 1.17 elif format.startswith('markup:'):
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/allpy/fileio/fastq.py Wed Nov 09 00:49:31 2011 +0300 2.3 @@ -0,0 +1,47 @@ 2.4 +class FastqFile(object): 2.5 + """FastQ parser & writer.""" 2.6 + 2.7 + def __init__(self, file, format='fasta', gaps=None): 2.8 + # NOTE: 'gaps' argument is ignored! 2.9 + self.file = file 2.10 + self.format = format 2.11 + self.phred_base = 33 2.12 + try: 2.13 + self.source = file.name 2.14 + except Exception: 2.15 + self.source = '' 2.16 + 2.17 + def write_alignment(self, alignment): 2.18 + """Append alignment to the file.""" 2.19 + write = self.file.write 2.20 + for row in alignment.rows_as_lists(): 2.21 + header = '%s %s' % (row.sequence.name, row.sequence.description) 2.22 + chars = "".join(monomer.code1 for monomer in row) 2.23 + phred = "".join( 2.24 + chr(monomer.quality + self.phred_base) 2.25 + for monomer in row 2.26 + ) 2.27 + write('@%s\n%s\n+\n%s\n' % (header, row, phred)) 2.28 + 2.29 + def read_alignment(self, alignment): 2.30 + """Append file contents to the alignment.""" 2.31 + for n, line in enumerate(self.file): 2.32 + n = n % 4 2.33 + line = line.rstrip('\n') 2.34 + if n == 0: 2.35 + assert line[0] == '@' 2.36 + name, description = line.split(' ', 1) 2.37 + elif n == 1: 2.38 + # XXX: this is ugly! 2.39 + # XXX append_row_from_string should return the new Sequence 2.40 + alignment.append_row_from_string(line, name, description, self.source, []) 2.41 + sequence = alignment.sequences[-1] 2.42 + elif n == 2: 2.43 + assert line[0] == '+' 2.44 + elif n == 3: 2.45 + assert len(sequence) == len(line) 2.46 + quality = sequence.add_markup('quality') 2.47 + for monomer, phred in zip(sequence, line): 2.48 + monomer.quality = ord(phred) - self.phred_base 2.49 + 2.50 +# vim: set et ts=4 sts=4 sw=4:
3.1 --- a/allpy/markups.py Mon Nov 07 18:20:12 2011 +0300 3.2 +++ b/allpy/markups.py Wed Nov 09 00:49:31 2011 +0300 3.3 @@ -152,6 +152,9 @@ 3.4 return monomer.code1.lower() 3.5 raise AssertionError("Unknown monomer case") 3.6 3.7 +class SequenceQualityMarkup(base.SequenceMarkup, IntMarkupMixin): 3.8 + name = 'quality' 3.9 + 3.10 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin): 3.11 name = 'pdb_resi' 3.12
4.1 --- a/allpy/processors.py Mon Nov 07 18:20:12 2011 +0300 4.2 +++ b/allpy/processors.py Wed Nov 09 00:49:31 2011 +0300 4.3 @@ -20,15 +20,17 @@ 4.4 self.command = command 4.5 4.6 def __call__(self, block): 4.7 - infile = NamedTemporaryFile(prefix='allpy_processor_', delete=False) 4.8 - outfile = NamedTemporaryFile('r', prefix='allpy_processor_', delete=False) 4.9 - block.to_file(infile) 4.10 - infile.close() 4.11 - os.system(self.command % {'infile': infile.name, 'outfile': outfile.name}) 4.12 - Alignment = block.__class__ 4.13 - new_alignment = Alignment().append_file(outfile) 4.14 - os.unlink(infile.name) 4.15 - os.unlink(outfile.name) 4.16 + kwargs = {'prefix': 'allpy_processor_', 'delete': False} 4.17 + with NamedTemporaryFile(**kwargs) as infile: 4.18 + with NamedTemporaryFile('r', **kwargs) as outfile: 4.19 + block.to_file(infile) 4.20 + infile.close() 4.21 + subst_vars = {'infile': infile.name, 'outfile': outfile.name} 4.22 + os.system(self.command % subst_vars) 4.23 + Alignment = block.__class__ 4.24 + new_alignment = Alignment().append_file(outfile) 4.25 + os.unlink(infile.name) 4.26 + os.unlink(outfile.name) 4.27 return new_alignment 4.28 4.29 #
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/test/test_fastq.py Wed Nov 09 00:49:31 2011 +0300 5.3 @@ -0,0 +1,27 @@ 5.4 +from StringIO import StringIO 5.5 +from allpy import dna 5.6 + 5.7 +example = """ 5.8 +@HWI-ST992:108:D06T0ACXX:6:1107:13562:94150 1:N:0: 5.9 +TCAATGGTGCTGTATAAAACACGTAGAATTACTATTATGGATTATATTCGTCTTCATTTGTTAAAATCAGTGGCTTAACTGGCTAAATTTTGTGGACTATA 5.10 ++ 5.11 +#8::4AD,A+C<D>EDEIEDFIE:FF<A@B+1??999E?<DDE<9:DADBD;D=8BD?B@B=B)===)7=..)-66@CCD)?=A;@?@AAAB>;(>AAAA> 5.12 +@HWI-ST992:108:D06T0ACXX:6:1107:13630:94150 1:N:0: 5.13 +GGTGAAGCGCTTTGAGTCCCTCTCCAGCCAGCGCTTCCTGCAGGGCGTGATCGTGGCGCTCATCACGGGCATGTTCTGGTGGCAGCGCGGCCGCGGCGGCT 5.14 ++ 5.15 +<@@DDFDBHHGFHIAHHIIIIIIIIIGHGIGAGHIHIIIIHIIII?G@BHHHHABDDDDDDDDDDDD?B@BCD@C@CDDACDDDDDDDDDBDBDBDD@B<> 5.16 +""".strip() 5.17 + 5.18 +def test_fastq_io(): 5.19 + file = StringIO(example) 5.20 + aln = dna.Alignment().append_file(file, format="fastq") 5.21 + assert len(aln.sequences) == 2 5.22 + a, b = aln.sequences 5.23 + assert str(a)[:10] == "TCAATGGTGC" 5.24 + assert str(b)[:10] == "GGTGAAGCGC" 5.25 + qa = list(a.markups['quality'].sorted_values()) 5.26 + assert qa[:10] == [2, 23, 25, 25, 19, 32, 35, 11, 32, 10] 5.27 + qb = list(b.markups['quality'].sorted_values()) 5.28 + assert qb[:10] == [27, 31, 31, 35, 35, 37, 35, 33, 39, 39] 5.29 + 5.30 +# vim: set et ts=4 sts=4 sw=4: