allpy

diff allpy/block.py @ 261:d60628e29b24
Moved contents of block.py to base.py without any changes.
author: Daniil Alexeyevsky <me.dendik@gmail.com>
date: Tue, 14 Dec 2010 21:02:49 +0300
parents: 73f9779491ef
     1.1 --- a/allpy/block.py	Tue Dec 14 20:51:53 2010 +0300
     1.2 +++ b/allpy/block.py	Tue Dec 14 21:02:49 2010 +0300
     1.3 @@ -1,172 +1,2 @@
     1.4  #!usr/bin/python
     1.5  
     1.6 -import sys
     1.7 -
     1.8 -import alignment
     1.9 -import sequence
    1.10 -import monomer
    1.11 -import config
    1.12 -from graph import Graph
    1.13 -from Bio.PDB import Superimposer
    1.14 -from tempfile import NamedTemporaryFile
    1.15 -import os
    1.16 -from fasta import save_fasta
    1.17 -
    1.18 -class Block(object):
    1.19 -    """ Block of alignment
    1.20 -    
    1.21 -    Mandatory data:
    1.22 -    *   self.alignment -- alignment object, which the block belongs to
    1.23 -    *   self.sequences - set of sequence objects that contain monomers
    1.24 -        and/or gaps, that constitute the block
    1.25 -    *   self.positions -- list of positions of the alignment.body that
    1.26 -        are included in the block; position[i+1] is always to the right from position[i]
    1.27 -    
    1.28 -    Don't change self.sequences -- it may be a link to other block.sequences
    1.29 -    
    1.30 -    How to create a new block:
    1.31 -    >>> import alignment
    1.32 -    >>> import block
    1.33 -    >>> proj = alignment.Alignment(open("test.fasta"))
    1.34 -    >>> block1 = block.Block(proj)
    1.35 -    """
    1.36 -    
    1.37 -    def __init__(self, alignment, sequences=None, positions=None):
    1.38 -        """ Builds new block from alignment
    1.39 -        
    1.40 -        if sequences==None, all sequences are used
    1.41 -        if positions==None, all positions are used
    1.42 -        """
    1.43 -        if sequences == None:
    1.44 -            sequences = set(alignment.sequences) # copy
    1.45 -        if positions == None:
    1.46 -            positions = range(len(alignment))
    1.47 -        self.alignment = alignment
    1.48 -        self.sequences = sequences
    1.49 -        self.positions = positions
    1.50 -    
    1.51 -    def save_fasta(self, out_file, long_line=70, gap='-'):
    1.52 -        """ Saves alignment to given file in fasta-format 
    1.53 -        
    1.54 -        No changes in the names, descriptions or order of the sequences
    1.55 -        are made.
    1.56 -        """
    1.57 -        for sequence in self.sequences:
    1.58 -            alignment_monomers = self.alignment.body[sequence]
    1.59 -            block_monomers = [alignment_monomers[i] for i in self.positions]
    1.60 -            string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
    1.61 -            save_fasta(out_file, string, sequence.name, sequence.description, long_line)
    1.62 -    
    1.63 -    def geometrical_cores(self, max_delta=config.delta, 
    1.64 -    timeout=config.timeout, minsize=config.minsize, 
    1.65 -    ac_new_atoms=config.ac_new_atoms,
    1.66 -    ac_count=config.ac_count):
    1.67 -        """ Returns length-sorted list of blocks, representing GCs
    1.68 -        
    1.69 -        max_delta -- threshold of distance spreading
    1.70 -        timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
    1.71 -        minsize -- min size of each core
    1.72 -        ac_new_atoms -- min part or new atoms in new alternative core
    1.73 -            current GC is compared with each of already selected GCs
    1.74 -            if difference is less then ac_new_atoms, current GC is skipped
    1.75 -            difference = part of new atoms in current core
    1.76 -        ac_count -- max number of cores (including main core)
    1.77 -            -1 means infinity
    1.78 -        If more than one pdb chain for some sequence provided, consider all of them
    1.79 -        cost is calculated as 1 / (delta + 1) 
    1.80 -            delta in [0, +inf) => cost in (0, 1]
    1.81 -        """
    1.82 -        nodes = self.positions
    1.83 -        lines = {}
    1.84 -        for i in self.positions:
    1.85 -            for j in self.positions:
    1.86 -                if i < j:
    1.87 -                    distances = []
    1.88 -                    for sequence in self.sequences:
    1.89 -                        for chain in sequence.pdb_chains:
    1.90 -                            m1 = self.alignment.body[sequence][i]
    1.91 -                            m2 = self.alignment.body[sequence][j]
    1.92 -                            if m1 and m2:
    1.93 -                                r1 = sequence.pdb_residues[chain][m1]
    1.94 -                                r2 = sequence.pdb_residues[chain][m2]
    1.95 -                                ca1 = r1['CA']
    1.96 -                                ca2 = r2['CA']
    1.97 -                                d = ca1 - ca2 # Bio.PDB feature
    1.98 -                                distances.append(d)
    1.99 -                    if len(distances) >= 2:
   1.100 -                        delta = max(distances) - min(distances)
   1.101 -                        if delta <= max_delta:
   1.102 -                            lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
   1.103 -        graph = Graph(nodes, lines)
   1.104 -        cliques = graph.cliques(timeout=timeout, minsize=minsize)
   1.105 -        GCs = []
   1.106 -        for clique in cliques:
   1.107 -            for GC in GCs:
   1.108 -                if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
   1.109 -                    break
   1.110 -            else:
   1.111 -                GCs.append(Block(self.alignment, self.sequences, clique))
   1.112 -                if ac_count != -1 and len(GCs) >= ac_count:
   1.113 -                    break
   1.114 -        return GCs
   1.115 -    
   1.116 -    def xstring(self, x='X', gap='-'):
   1.117 -        """ Returns string consisting of gap chars and chars x at self.positions
   1.118 -        
   1.119 -        Length of returning string = length of alignment
   1.120 -        """
   1.121 -        monomers = [False] * len(self.alignment)
   1.122 -        for i in self.positions:
   1.123 -            monomers[i] = True
   1.124 -        return ''.join([x if m else gap for m in monomers])
   1.125 -    
   1.126 -    def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70):
   1.127 -        """ Save xstring and name in fasta format """
   1.128 -        save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line)
   1.129 -    
   1.130 -    def monomers(self, sequence):
   1.131 -        """ Iterates monomers of this sequence from this block """
   1.132 -        alignment_sequence = self.alignment.body[sequence]
   1.133 -        return (alignment_sequence[i] for i in self.positions)
   1.134 -    
   1.135 -    def ca_atoms(self, sequence, pdb_chain):
   1.136 -        """ Iterates Ca-atom of monomers of this sequence from this block  """
   1.137 -        return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
   1.138 -    
   1.139 -    def sequences_chains(self):
   1.140 -        """ Iterates pairs (sequence, chain) """
   1.141 -        for sequence in self.alignment.sequences:
   1.142 -            if sequence in self.sequences:
   1.143 -                for chain in sequence.pdb_chains:
   1.144 -                    yield (sequence, chain)
   1.145 -    
   1.146 -    def superimpose(self):
   1.147 -        """ Superimpose all pdb_chains in this block """
   1.148 -        sequences_chains = list(self.sequences_chains())
   1.149 -        if len(sequences_chains) >= 1:
   1.150 -            sup = Superimposer()
   1.151 -            fixed_sequence, fixed_chain = sequences_chains.pop()
   1.152 -            fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
   1.153 -            for sequence, chain in sequences_chains:
   1.154 -                moving_atoms =  self.ca_atoms(sequence, chain)
   1.155 -                sup.set_atoms(fixed_atoms, moving_atoms)
   1.156 -                # Apply rotation/translation to the moving atoms
   1.157 -                sup.apply(moving_atoms)
   1.158 -    
   1.159 -    def pdb_save(self, out_file):
   1.160 -        """ Save all sequences 
   1.161 -        
   1.162 -        Returns {(sequence, chain): CHAIN}
   1.163 -        CHAIN is chain letter in new file
   1.164 -        """
   1.165 -        tmp_file = NamedTemporaryFile(delete=False)
   1.166 -        tmp_file.close()
   1.167 -        
   1.168 -        for sequence, chain in self.sequences_chains():
   1.169 -            sequence.pdb_save(tmp_file.name, chain)
   1.170 -            # TODO: read from tmp_file.name
   1.171 -            # change CHAIN
   1.172 -            # add to out_file
   1.173 -        
   1.174 -        os.unlink(NamedTemporaryFile)
   1.175 -
author	Daniil Alexeyevsky <me.dendik@gmail.com>
date	Tue, 14 Dec 2010 21:02:49 +0300
parents	73f9779491ef
children