Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/6ad74e2ba3d2/allpy/fasta.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 23:23:38 2013
Кодировка:
allpy: 6ad74e2ba3d2 allpy/fasta.py

allpy

view allpy/fasta.py @ 330:6ad74e2ba3d2

Fixed bug: fasta reader broke on files with Windows EOLs broke
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Fri, 17 Dec 2010 20:54:35 +0300
parents 01547d8d5c36
children 6190ad032194
line source
1 import util
3 def parse_file(file):
4 """Parse fasta file, remove spaces and newlines from sequence bodies.
6 Return a list of tuples (name, description, sequence_body).
7 """
8 sequences = []
9 for part in file.read().split("\n>"):
10 header, _, body = part.partition("\n")
11 header = header.lstrip(">").strip()
12 name, _, description = header.partition(" ")
13 name = name.strip()
14 description = description.strip()
15 body = util.remove_each(body " \n\r\t\v")
16 sequences.append((name, description, body))
17 return sequences
19 def save_file(out_file, string, name, description='', long_line=70):
20 """ Saves given string to out_file in fasta_format
22 Splits long lines to substrings of length=long_line
23 To prevent this, set long_line=None
24 """
25 if description:
26 name += " " + description
27 out_file.write(">%s\n" % name)
28 if long_line:
29 for i in range(0, len(string) // long_line + 1):
30 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
31 else:
32 out_file.write("%s \n" % string)
34 def determine_long_line(in_file):
35 """ Returns maximum sequence line length in fasta file """
36 sequences = in_file.read().split('>')
37 for sequence in sequences[1:]:
38 lines = sequence.split('\n')[1:]
39 if len(lines) >= 2:
40 return len(lines[0].strip())
41 return 70
43 # vim: set ts=4 sts=4 sw=4 et: