allpy
changeset 0:757f2a1f8732
Initial commit with the lib modules
author | Boris Burkov <BurkovBA@gmail.com> |
---|---|
date | Fri, 21 May 2010 20:30:40 +0400 |
parents | |
children | bbf3a797cc67 |
files | lib/block.py lib/monomer.py lib/project.py lib/sequence.py |
diffstat | 4 files changed, 161 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/lib/block.py Fri May 21 20:30:40 2010 +0400 1.3 @@ -0,0 +1,57 @@ 1.4 +#!usr/bin/python 1.5 + 1.6 +import sys 1.7 + 1.8 +import configure 1.9 +import project 1.10 +import sequence 1.11 +import monomer 1.12 +import rooted_tree 1.13 +from Lgamma import Lgamma 1.14 + 1.15 +class Block(object): 1.16 + """ 1.17 + Mandatory data: 1.18 + * self.project -- project object, which the block belongs to 1.19 + * self.sequences - list of sequence objects, that contain monomers 1.20 + and/or gaps, that constitute the block 1.21 + * self.positions -- positions of the project.alignment that are 1.22 + included in the block 1.23 + 1.24 + How to create a new block: 1.25 + >>> import project 1.26 + >>> import block 1.27 + >>> proj = project.Project(open("test.fasta")) 1.28 + >>> block1 = block.Block(proj, proj.sequences, range(len(proj.alignment[proj.sequences[0]]))) 1.29 + 1.30 + """ 1.31 + 1.32 + def __init__(self,project,sequences,positions): 1.33 + self.project=project 1.34 + self.sequences=sequences 1.35 + self.positions=positions 1.36 + 1.37 + def to_fasta(self,file): 1.38 + """writes the block as an alignment in fasta-format into the file. 1.39 + 1.40 + No changes in the names, descriptions or order of the sequences 1.41 + are made. 1.42 + 1.43 + """ 1.44 + for sequence in self.sequences: 1.45 + file.write(">%s %s\n"%(sequence.name,sequence.description)) 1.46 + string_index=0 1.47 + for position in self.positions: 1.48 + if string_index>=60: 1.49 + file.write("\n") 1.50 + string_index=0 1.51 + if self.project.alignment[sequence][position]==None: 1.52 + file.write("-") 1.53 + string_index+=1 1.54 + else: 1.55 + file.write(self.project.alignment[sequence][position].code) 1.56 + string_index+=1 1.57 + file.write("\n") 1.58 + 1.59 + 1.60 +
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/lib/monomer.py Fri May 21 20:30:40 2010 +0400 2.3 @@ -0,0 +1,5 @@ 2.4 +#!/usr/bin/python 2.5 + 2.6 +class Monomer(object): 2.7 + def __init__(self,code): 2.8 + self.code=code
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/lib/project.py Fri May 21 20:30:40 2010 +0400 3.3 @@ -0,0 +1,90 @@ 3.4 +#!/usr/bin/python 3.5 + 3.6 +""" 3.7 + "I will not use abbrev." 3.8 + "I will always finish what I st" 3.9 + - Bart Simpson 3.10 + 3.11 +""" 3.12 + 3.13 +import configure 3.14 +import sequence 3.15 +import monomer 3.16 + 3.17 + 3.18 +class Project(object): 3.19 + """ 3.20 + Mandatory data: 3.21 + * sequences -- list of Sequence objects. Sequences don't contain gaps 3.22 + - see sequence.py module 3.23 + * alignment -- dict 3.24 + {<Sequence object>:[<Monomer object>,None,<Monomer object>]} 3.25 + keys are the Sequence objects, values are the lists, which 3.26 + contain monomers of those sequences or None for gaps in the 3.27 + corresponding sequence of 3.28 + alignment 3.29 + 3.30 + """ 3.31 + def __init__(self, *args): 3.32 + """overloaded constructor 3.33 + 3.34 + Project() -> new empty Project 3.35 + Project(sequences, alignment) -> new Project with sequences and 3.36 + alignment initialized from arguments 3.37 + Project(fasta_file) -> new Project, read alignment and sequences 3.38 + from fasta file 3.39 + 3.40 + """ 3.41 + if len(args)>1:#overloaded constructor 3.42 + self.sequences=args[0] 3.43 + self.alignment=args[1] 3.44 + elif len(args)==0: 3.45 + self.sequences=[] 3.46 + self.alignment={} 3.47 + else: 3.48 + self.sequences,self.alignment=Project.get_from_fasta(args[0]) 3.49 + 3.50 + @staticmethod 3.51 + def get_from_fasta(file): 3.52 + """ 3.53 + >>> import project 3.54 + >>> sequences,alignment=project.Project.get_from_fasta(open("test.fasta")) 3.55 + """ 3.56 + import re 3.57 + 3.58 + sequences=[] 3.59 + alignment={} 3.60 + 3.61 + content=file.read() 3.62 + raw_sequences=content.split(">")[1:]#ignore everything before the first > 3.63 + for raw in raw_sequences: 3.64 + parsed_raw_sequence = raw.split("\n") 3.65 + for counter,piece in enumerate(parsed_raw_sequence): 3.66 + parsed_raw_sequence[counter]=piece.strip()#cut \r or whitespaces 3.67 + name_and_description = parsed_raw_sequence[0] 3.68 + if len(name_and_description.split(" ",1))==2: 3.69 + name,description=name_and_description.split(" ",1) 3.70 + elif len(name_and_description.split(" ",1))==1:#if there is description 3.71 + name=name_and_description 3.72 + else: 3.73 + raise "Wrong name of sequence in fasta file" 3.74 + string="" 3.75 + for piece in parsed_raw_sequence[1:]: 3.76 + piece_without_whitespace_chars=re.sub("\s","",piece) 3.77 + string+=piece_without_whitespace_chars 3.78 + monomers=[]#convert into Monomer objects 3.79 + alignment_list=[]#create the respective list in alignment dict 3.80 + for current_monomer in string: 3.81 + if current_monomer!="-" and current_monomer!="." and current_monomer!="~": 3.82 + monomers.append(monomer.Monomer(current_monomer)) 3.83 + alignment_list.append(monomers[-1]) 3.84 + else: 3.85 + alignment_list.append(None) 3.86 + if "description" in vars():#if there's no description 3.87 + sequences.append(sequence.Sequence(name,description,monomers)) 3.88 + else: 3.89 + sequences.append(sequence.Sequence(name,None,monomers)) 3.90 + alignment[sequences[-1]]=alignment_list 3.91 + return sequences,alignment 3.92 + 3.93 +
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/lib/sequence.py Fri May 21 20:30:40 2010 +0400 4.3 @@ -0,0 +1,9 @@ 4.4 +#!/usr/bin/python 4.5 +import monomer 4.6 + 4.7 +class Sequence(object): 4.8 + def __init__(self, name, description, monomers): 4.9 + self.name=name 4.10 + self.description=description 4.11 + self.monomers=monomers 4.12 +