allpy: 217d83a617c3 lib/project.py

allpy

view lib/project.py @ 73:217d83a617c3

In project.py method "get_from_fasta" renamed "from_fasta"

author	Boris Burkov <BurkovBA@gmail.com>
date	Wed, 22 Sep 2010 18:31:56 +0400
parents	bbf3a797cc67
children	6cd288019400

line source

1 #!/usr/bin/python

3 """

4 "I will not use abbrev."

5 "I will always finish what I st"

6 - Bart Simpson

8 """

10 import sequence

11 import monomer

14 class Project(object):

15 """

16 Mandatory data:

17 * sequences -- list of Sequence objects. Sequences don't contain gaps

18 - see sequence.py module

19 * alignment -- dict

20 {<Sequence object>:[<Monomer object>,None,<Monomer object>]}

21 keys are the Sequence objects, values are the lists, which

22 contain monomers of those sequences or None for gaps in the

23 corresponding sequence of

24 alignment

26 """

27 def __init__(self, *args):

28 """overloaded constructor

30 Project() -> new empty Project

31 Project(sequences, alignment) -> new Project with sequences and

32 alignment initialized from arguments

33 Project(fasta_file) -> new Project, read alignment and sequences

34 from fasta file

36 """

37 if len(args)>1:#overloaded constructor

38 self.sequences=args[0]

39 self.alignment=args[1]

40 elif len(args)==0:

41 self.sequences=[]

42 self.alignment={}

43 else:

44 self.sequences,self.alignment=Project.get_from_fasta(args[0])

46 @staticmethod

47 def from_fasta(file):

48 """

49 >>> import project

50 >>> sequences,alignment=project.Project.from_fasta(open("test.fasta"))

51 """

52 import re

54 sequences=[]

55 alignment={}

57 content=file.read()

58 raw_sequences=content.split(">")[1:]#ignore everything before the first >

59 for raw in raw_sequences:

60 parsed_raw_sequence = raw.split("\n")

61 for counter,piece in enumerate(parsed_raw_sequence):

62 parsed_raw_sequence[counter]=piece.strip()#cut \r or whitespaces

63 name_and_description = parsed_raw_sequence[0]

64 if len(name_and_description.split(" ",1))==2:

65 name,description=name_and_description.split(" ",1)

66 elif len(name_and_description.split(" ",1))==1:#if there is description

67 name=name_and_description

68 else:

69 raise "Wrong name of sequence in fasta file"

70 string=""

71 for piece in parsed_raw_sequence[1:]:

72 piece_without_whitespace_chars=re.sub("\s","",piece)

73 string+=piece_without_whitespace_chars

74 monomers=[]#convert into Monomer objects

75 alignment_list=[]#create the respective list in alignment dict

76 for current_monomer in string:

77 if current_monomer!="-" and current_monomer!="." and current_monomer!="~":

78 monomers.append(monomer.Monomer(current_monomer))

79 alignment_list.append(monomers[-1])

80 else:

81 alignment_list.append(None)

82 if "description" in vars():#if there's no description

83 sequences.append(sequence.Sequence(name,description,monomers))

84 else:

85 sequences.append(sequence.Sequence(name,None,monomers))

86 alignment[sequences[-1]]=alignment_list

87 return sequences,alignment