allpy

changeset 0:757f2a1f8732
Initial commit with the lib modules
author: Boris Burkov <BurkovBA@gmail.com>
date: Fri, 21 May 2010 20:30:40 +0400
children: bbf3a797cc67
files: lib/block.py lib/monomer.py lib/project.py lib/sequence.py
diffstat: 4 files changed, 161 insertions(+), 0 deletions(-) [+]
[-]

lib/block.py 57

lib/monomer.py 5

lib/project.py 90

lib/sequence.py 9 lib/block.py 57 lib/monomer.py 5 lib/project.py 90 lib/sequence.py 9
lib/block.py 57
lib/monomer.py 5
lib/project.py 90
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lib/block.py	Fri May 21 20:30:40 2010 +0400
     1.3 @@ -0,0 +1,57 @@
     1.4 +#!usr/bin/python
     1.5 +
     1.6 +import sys
     1.7 +
     1.8 +import configure
     1.9 +import project
    1.10 +import sequence
    1.11 +import monomer
    1.12 +import rooted_tree
    1.13 +from Lgamma import Lgamma
    1.14 +
    1.15 +class Block(object):
    1.16 +    """
    1.17 +    Mandatory data:
    1.18 +    *   self.project -- project object, which the block belongs to
    1.19 +    *   self.sequences - list of sequence objects, that contain monomers
    1.20 +        and/or gaps, that constitute the block
    1.21 +    *   self.positions -- positions of the project.alignment that are
    1.22 +        included in the block
    1.23 +
    1.24 +    How to create a new block:
    1.25 +    >>> import project
    1.26 +    >>> import block
    1.27 +    >>> proj = project.Project(open("test.fasta"))
    1.28 +    >>> block1 = block.Block(proj, proj.sequences, range(len(proj.alignment[proj.sequences[0]])))
    1.29 +
    1.30 +    """
    1.31 +    
    1.32 +    def __init__(self,project,sequences,positions):
    1.33 +        self.project=project
    1.34 +        self.sequences=sequences
    1.35 +        self.positions=positions
    1.36 +
    1.37 +    def to_fasta(self,file):
    1.38 +        """writes the block as an alignment in fasta-format into the file.
    1.39 +
    1.40 +        No changes in the names, descriptions or order of the sequences
    1.41 +        are made.
    1.42 +
    1.43 +        """
    1.44 +        for sequence in self.sequences:
    1.45 +            file.write(">%s %s\n"%(sequence.name,sequence.description))
    1.46 +            string_index=0
    1.47 +            for position in self.positions:
    1.48 +                if string_index>=60:
    1.49 +                    file.write("\n")
    1.50 +                    string_index=0
    1.51 +                if self.project.alignment[sequence][position]==None:
    1.52 +                    file.write("-")
    1.53 +                    string_index+=1
    1.54 +                else:
    1.55 +                    file.write(self.project.alignment[sequence][position].code)
    1.56 +                    string_index+=1
    1.57 +            file.write("\n")
    1.58 +
    1.59 +
    1.60 +

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/lib/monomer.py	Fri May 21 20:30:40 2010 +0400
     2.3 @@ -0,0 +1,5 @@
     2.4 +#!/usr/bin/python
     2.5 +
     2.6 +class Monomer(object):
     2.7 +    def __init__(self,code):
     2.8 +        self.code=code

     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/lib/project.py	Fri May 21 20:30:40 2010 +0400
     3.3 @@ -0,0 +1,90 @@
     3.4 +#!/usr/bin/python
     3.5 +
     3.6 +"""
     3.7 +    "I will not use abbrev."
     3.8 +    "I will always finish what I st"    
     3.9 +                                            - Bart Simpson
    3.10 +
    3.11 +"""
    3.12 +
    3.13 +import configure
    3.14 +import sequence
    3.15 +import monomer
    3.16 +
    3.17 +
    3.18 +class Project(object):
    3.19 +    """
    3.20 +    Mandatory data:
    3.21 +    *   sequences -- list of Sequence objects. Sequences don't contain gaps
    3.22 +         - see sequence.py module
    3.23 +    *   alignment -- dict 
    3.24 +        {<Sequence object>:[<Monomer object>,None,<Monomer object>]}
    3.25 +        keys are the Sequence objects, values are the lists, which
    3.26 +        contain monomers of those sequences or None for gaps in the
    3.27 +        corresponding sequence of
    3.28 +        alignment
    3.29 +
    3.30 +    """
    3.31 +    def __init__(self, *args):
    3.32 +        """overloaded constructor
    3.33 +
    3.34 +        Project() -> new empty Project
    3.35 +        Project(sequences, alignment) -> new Project with sequences and
    3.36 +            alignment initialized from arguments
    3.37 +        Project(fasta_file) -> new Project, read alignment and sequences
    3.38 +             from fasta file 
    3.39 +
    3.40 +        """
    3.41 +        if len(args)>1:#overloaded constructor
    3.42 +            self.sequences=args[0]
    3.43 +            self.alignment=args[1]
    3.44 +        elif len(args)==0:
    3.45 +            self.sequences=[]
    3.46 +            self.alignment={}
    3.47 +        else:
    3.48 +            self.sequences,self.alignment=Project.get_from_fasta(args[0])
    3.49 +
    3.50 +    @staticmethod
    3.51 +    def get_from_fasta(file):
    3.52 +        """
    3.53 +        >>> import project
    3.54 +        >>> sequences,alignment=project.Project.get_from_fasta(open("test.fasta"))       
    3.55 +        """
    3.56 +        import re
    3.57 +
    3.58 +        sequences=[]
    3.59 +        alignment={}
    3.60 +
    3.61 +        content=file.read()
    3.62 +        raw_sequences=content.split(">")[1:]#ignore everything before the first >
    3.63 +        for raw in raw_sequences:
    3.64 +            parsed_raw_sequence = raw.split("\n")
    3.65 +            for counter,piece in enumerate(parsed_raw_sequence):
    3.66 +                parsed_raw_sequence[counter]=piece.strip()#cut \r or whitespaces
    3.67 +            name_and_description = parsed_raw_sequence[0] 
    3.68 +            if len(name_and_description.split(" ",1))==2:
    3.69 +                name,description=name_and_description.split(" ",1)
    3.70 +            elif len(name_and_description.split(" ",1))==1:#if there is description
    3.71 +                name=name_and_description
    3.72 +            else:
    3.73 +                raise "Wrong name of sequence in fasta file"
    3.74 +            string=""
    3.75 +            for piece in parsed_raw_sequence[1:]:
    3.76 +                piece_without_whitespace_chars=re.sub("\s","",piece)
    3.77 +                string+=piece_without_whitespace_chars
    3.78 +            monomers=[]#convert into Monomer objects
    3.79 +            alignment_list=[]#create the respective list in alignment dict
    3.80 +            for current_monomer in string:
    3.81 +                if current_monomer!="-" and current_monomer!="." and current_monomer!="~":
    3.82 +                    monomers.append(monomer.Monomer(current_monomer))
    3.83 +                    alignment_list.append(monomers[-1])
    3.84 +                else:
    3.85 +                    alignment_list.append(None)
    3.86 +            if "description" in vars():#if there's no description
    3.87 +                sequences.append(sequence.Sequence(name,description,monomers))
    3.88 +            else:
    3.89 +                sequences.append(sequence.Sequence(name,None,monomers))
    3.90 +            alignment[sequences[-1]]=alignment_list
    3.91 +        return sequences,alignment
    3.92 +       
    3.93 +

     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/lib/sequence.py	Fri May 21 20:30:40 2010 +0400
     4.3 @@ -0,0 +1,9 @@
     4.4 +#!/usr/bin/python
     4.5 +import monomer 
     4.6 +
     4.7 +class Sequence(object):
     4.8 +    def __init__(self, name, description, monomers):
     4.9 +        self.name=name
    4.10 +        self.description=description
    4.11 +        self.monomers=monomers 
    4.12 +