Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/a1307c0bb030/allpy/fileio.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 03:43:24 2013
Кодировка:
allpy: a1307c0bb030 allpy/fileio.py

allpy

view allpy/fileio.py @ 642:a1307c0bb030

Added necessary hooks for monomer pickling [closes #35] Current implementation is lazy and does not store all monomer classes explicitly in some module. They are still generated on the fly. Some monomer classes have the same name as per PDB database. In order to avoid name clashes, we add underscores to classes, if same name class already exists. WARNING. This may and WILL cause trouble, if such clashes occur between different types of monomers, in which case different names will be generated for the same class depending on the order of loading modules. The only example of such clash in the current database is dna monomer "0AV" and rna "A2M", which both have name "2'-O-METHYLADENOSINE 5'-(DIHYDROGEN PHOSPHATE)"
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Fri, 03 Jun 2011 16:49:44 +0400
parents b6a53615f1e9
children 80043822a41e
line source
1 import os
2 from subprocess import Popen, PIPE
3 from tempfile import NamedTemporaryFile
4 import util
6 class File(object):
7 """Automatical file IO."""
8 def __new__(cls, file, format="fasta"):
9 if format == "fasta":
10 return FastaFile(file)
11 else:
12 return EmbossFile(file, format)
14 class FastaFile(object):
15 """Fasta parser & writer."""
17 def __init__(self, file, wrap_column=70):
18 self.file = file
19 self.wrap_column = wrap_column
21 def write_string(self, string, name, description=''):
22 """Append one sequence to file."""
23 if description:
24 name += " " + description
25 self.file.write(">%s\n" % name)
26 if self.wrap_column:
27 while string:
28 self.file.write(string[:self.wrap_column]+"\n")
29 string = string[self.wrap_column:]
30 else:
31 self.file.write(string+"\n")
32 self.file.flush()
34 def write_strings(self, sequences):
35 """Write sequences to file.
37 Sequences are given as list of tuples (string, name, description).
38 """
39 for string, name, description in sequences:
40 self.write_string(string, name, description)
42 def read_strings(self):
43 for part in self.file.read().split("\n>"):
44 header, _, body = part.partition("\n")
45 header = header.lstrip(">")
46 name, _, description = header.partition(" ")
47 name = name.strip()
48 description = description.strip()
49 body = util.remove_each(body, " \n\r\t\v")
50 yield (name, description, body)
52 class EmbossFile(object):
53 """Parser & writer for file formats supported by EMBOSS."""
55 def __init__(self, file, format):
56 self.file = file
57 self.format = format
59 def write_strings(self, sequences):
60 """Write sequences to file."""
61 pipe = Popen(['seqret', 'stdin', '%s::stdout' % self.format],
62 stdin=PIPE, stdout=PIPE
63 )
64 FastaFile(pipe.stdin).write_strings(self.fix_sequences(sequences))
65 pipe.stdin.close()
66 for line in pipe.stdout:
67 self.file.write(line)
69 def fix_sequences(self, sequences):
70 """EMBOSS does not permit : in file names. Fix sequences for that."""
71 for name, description, sequence in sequences:
72 yield name.replace(':', '_'), description, sequence
74 def read_strings(self):
75 """Read sequences from file."""
76 pipe = Popen(['seqret', '%s::stdin' % self.format, 'stdout'],
77 stdin=PIPE, stdout=PIPE
78 )
79 for line in self.file:
80 pipe.stdin.write(line)
81 pipe.stdin.close()
82 return FastaFile(pipe.stdout).read_strings()
84 # vim: set et ts=4 sts=4 sw=4: