Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/cf6cdc3b7ec5/allpy/fasta.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 23:41:27 2013
Кодировка:
allpy: cf6cdc3b7ec5 allpy/fasta.py

allpy

view allpy/fasta.py @ 286:cf6cdc3b7ec5

Fixed spaces
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Wed, 15 Dec 2010 23:45:11 +0300
parents 4e6e85851133
children f8bd7c469fcf
line source
1 def parse_fasta(file):
2 """Parse fasta file, remove spaces and newlines from sequence bodies.
4 Return a dict of { sequence header: sequence body }.
5 """
6 sequences = {}
7 for part in file.read().split(">"):
8 header, _, body = part.partition("\n")
9 header = header.lstrip(">").strip()
10 body = body.replace(" ", "").replace("\n", "")
11 sequences[header] = body
12 return sequences
14 def save_fasta(out_file, string, name, description='', long_line=70):
15 """ Saves given string to out_file in fasta_format
17 Splits long lines to substrings of length=long_line
18 To prevent this, set long_line=None
19 """
20 out_file.write(">%(name)s %(description)s \n" % {'name':name, 'description':description})
21 if long_line:
22 for i in range(0, len(string) // long_line + 1):
23 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
24 else:
25 out_file.write("%s \n" % string)
27 def determine_long_line(in_file):
28 """ Returns maximum sequence line length in fasta file """
29 sequences = in_file.read().split('>')
30 for sequence in sequences[1:]:
31 lines = sequence.split('\n')[1:]
32 if len(lines) >= 2:
33 return len(lines[0].strip())
34 return 70
36 # vim: set ts=4 sts=4 sw=4 et: