Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/217d83a617c3/lib/project.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 23:23:21 2013
Кодировка:
allpy: 217d83a617c3 lib/project.py

allpy

view lib/project.py @ 73:217d83a617c3

In project.py method "get_from_fasta" renamed "from_fasta"
author Boris Burkov <BurkovBA@gmail.com>
date Wed, 22 Sep 2010 18:31:56 +0400
parents bbf3a797cc67
children 6cd288019400
line source
1 #!/usr/bin/python
3 """
4 "I will not use abbrev."
5 "I will always finish what I st"
6 - Bart Simpson
8 """
10 import sequence
11 import monomer
14 class Project(object):
15 """
16 Mandatory data:
17 * sequences -- list of Sequence objects. Sequences don't contain gaps
18 - see sequence.py module
19 * alignment -- dict
20 {<Sequence object>:[<Monomer object>,None,<Monomer object>]}
21 keys are the Sequence objects, values are the lists, which
22 contain monomers of those sequences or None for gaps in the
23 corresponding sequence of
24 alignment
26 """
27 def __init__(self, *args):
28 """overloaded constructor
30 Project() -> new empty Project
31 Project(sequences, alignment) -> new Project with sequences and
32 alignment initialized from arguments
33 Project(fasta_file) -> new Project, read alignment and sequences
34 from fasta file
36 """
37 if len(args)>1:#overloaded constructor
38 self.sequences=args[0]
39 self.alignment=args[1]
40 elif len(args)==0:
41 self.sequences=[]
42 self.alignment={}
43 else:
44 self.sequences,self.alignment=Project.get_from_fasta(args[0])
46 @staticmethod
47 def from_fasta(file):
48 """
49 >>> import project
50 >>> sequences,alignment=project.Project.from_fasta(open("test.fasta"))
51 """
52 import re
54 sequences=[]
55 alignment={}
57 content=file.read()
58 raw_sequences=content.split(">")[1:]#ignore everything before the first >
59 for raw in raw_sequences:
60 parsed_raw_sequence = raw.split("\n")
61 for counter,piece in enumerate(parsed_raw_sequence):
62 parsed_raw_sequence[counter]=piece.strip()#cut \r or whitespaces
63 name_and_description = parsed_raw_sequence[0]
64 if len(name_and_description.split(" ",1))==2:
65 name,description=name_and_description.split(" ",1)
66 elif len(name_and_description.split(" ",1))==1:#if there is description
67 name=name_and_description
68 else:
69 raise "Wrong name of sequence in fasta file"
70 string=""
71 for piece in parsed_raw_sequence[1:]:
72 piece_without_whitespace_chars=re.sub("\s","",piece)
73 string+=piece_without_whitespace_chars
74 monomers=[]#convert into Monomer objects
75 alignment_list=[]#create the respective list in alignment dict
76 for current_monomer in string:
77 if current_monomer!="-" and current_monomer!="." and current_monomer!="~":
78 monomers.append(monomer.Monomer(current_monomer))
79 alignment_list.append(monomers[-1])
80 else:
81 alignment_list.append(None)
82 if "description" in vars():#if there's no description
83 sequences.append(sequence.Sequence(name,description,monomers))
84 else:
85 sequences.append(sequence.Sequence(name,None,monomers))
86 alignment[sequences[-1]]=alignment_list
87 return sequences,alignment