Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/5417b9d6b63e/allpy/fasta.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 23:35:43 2013
Кодировка:
allpy: 5417b9d6b63e allpy/fasta.py

allpy

view allpy/fasta.py @ 312:5417b9d6b63e

Added base.Block.flush_left
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Thu, 16 Dec 2010 22:30:32 +0300
parents 484ca4b51ad6
children 316122b6b70e
line source
1 def parse_fasta(file):
2 """Parse fasta file, remove spaces and newlines from sequence bodies.
4 Return a dict of { (name, description) : sequence_body }.
5 """
6 sequences = {}
7 for part in file.read().split(">"):
8 header, _, body = part.partition("\n")
9 header = header.lstrip(">").strip()
10 name, _, description = header.partition(" ")
11 name = name.strip()
12 description = description.strip()
13 body = body.replace(" ", "").replace("\n", "")
14 sequences[name, description] = body
15 return sequences
17 def save_fasta(out_file, string, name, description='', long_line=70):
18 """ Saves given string to out_file in fasta_format
20 Splits long lines to substrings of length=long_line
21 To prevent this, set long_line=None
22 """
23 header = " ".join(filter(None, [name, description]))
24 out_file.write(">%s\n" % header)
25 if long_line:
26 for i in range(0, len(string) // long_line + 1):
27 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
28 else:
29 out_file.write("%s \n" % string)
31 def determine_long_line(in_file):
32 """ Returns maximum sequence line length in fasta file """
33 sequences = in_file.read().split('>')
34 for sequence in sequences[1:]:
35 lines = sequence.split('\n')[1:]
36 if len(lines) >= 2:
37 return len(lines[0].strip())
38 return 70
40 # vim: set ts=4 sts=4 sw=4 et: