allpy

changeset 309:0c3c1856113a
Moved base.Blog.geometrical_cores to pdb.Block
author: Daniil Alexeyevsky <me.dendik@gmail.com>
date: Thu, 16 Dec 2010 20:47:09 +0300
parents: 4d610c277281
children: 9ad22a1848fc
files: allpy/base.py allpy/pdb.py
diffstat: 2 files changed, 50 insertions(+), 55 deletions(-) [+]
[-]

allpy/base.py 55

allpy/pdb.py 50 allpy/base.py 55 allpy/pdb.py 50
allpy/base.py 55
allpy/pdb.py 50
     1.1 --- a/allpy/base.py	Thu Dec 16 20:45:57 2010 +0300
     1.2 +++ b/allpy/base.py	Thu Dec 16 20:47:09 2010 +0300
     1.3 @@ -380,61 +380,6 @@
     1.4              string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
     1.5              save_fasta(out_file, string, sequence.name, sequence.description, long_line)
     1.6  
     1.7 -    def geometrical_cores(self, max_delta=config.delta,
     1.8 -    timeout=config.timeout, minsize=config.minsize,
     1.9 -    ac_new_atoms=config.ac_new_atoms,
    1.10 -    ac_count=config.ac_count):
    1.11 -        """ Returns length-sorted list of blocks, representing GCs
    1.12 -
    1.13 -        * max_delta -- threshold of distance spreading
    1.14 -        * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
    1.15 -        * minsize -- min size of each core
    1.16 -        * ac_new_atoms -- min part or new atoms in new alternative core
    1.17 -            current GC is compared with each of already selected GCs if
    1.18 -            difference is less then ac_new_atoms, current GC is skipped
    1.19 -            difference = part of new atoms in current core
    1.20 -        * ac_count -- max number of cores (including main core)
    1.21 -            -1 means infinity
    1.22 -
    1.23 -        If more than one pdb chain for some sequence provided, consider all of them
    1.24 -        cost is calculated as 1 / (delta + 1)
    1.25 -
    1.26 -            delta in [0, +inf) => cost in (0, 1]
    1.27 -        """
    1.28 -        nodes = self.positions
    1.29 -        lines = {}
    1.30 -        for i in self.positions:
    1.31 -            for j in self.positions:
    1.32 -                if i < j:
    1.33 -                    distances = []
    1.34 -                    for sequence in self.sequences:
    1.35 -                        for chain in sequence.pdb_chains:
    1.36 -                            m1 = self.alignment.body[sequence][i]
    1.37 -                            m2 = self.alignment.body[sequence][j]
    1.38 -                            if m1 and m2:
    1.39 -                                r1 = sequence.pdb_residues[chain][m1]
    1.40 -                                r2 = sequence.pdb_residues[chain][m2]
    1.41 -                                ca1 = r1['CA']
    1.42 -                                ca2 = r2['CA']
    1.43 -                                d = ca1 - ca2 # Bio.PDB feature
    1.44 -                                distances.append(d)
    1.45 -                    if len(distances) >= 2:
    1.46 -                        delta = max(distances) - min(distances)
    1.47 -                        if delta <= max_delta:
    1.48 -                            lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
    1.49 -        graph = Graph(nodes, lines)
    1.50 -        cliques = graph.cliques(timeout=timeout, minsize=minsize)
    1.51 -        GCs = []
    1.52 -        for clique in cliques:
    1.53 -            for GC in GCs:
    1.54 -                if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
    1.55 -                    break
    1.56 -            else:
    1.57 -                GCs.append(Block(self.alignment, self.sequences, clique))
    1.58 -                if ac_count != -1 and len(GCs) >= ac_count:
    1.59 -                    break
    1.60 -        return GCs
    1.61 -
    1.62      def xstring(self, x='X', gap='-'):
    1.63          """ Returns string consisting of gap chars and chars x at self.positions
    1.64  

     2.1 --- a/allpy/pdb.py	Thu Dec 16 20:45:57 2010 +0300
     2.2 +++ b/allpy/pdb.py	Thu Dec 16 20:47:09 2010 +0300
     2.3 @@ -230,6 +230,56 @@
     2.4  
     2.5  class Block(base.Block):
     2.6  
     2.7 +    def geometrical_cores(self, max_delta=config.delta,
     2.8 +    timeout=config.timeout, minsize=config.minsize,
     2.9 +    ac_new_atoms=config.ac_new_atoms,
    2.10 +    ac_count=config.ac_count):
    2.11 +        """ Returns length-sorted list of blocks, representing GCs
    2.12 +
    2.13 +        * max_delta -- threshold of distance spreading
    2.14 +        * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
    2.15 +        * minsize -- min size of each core
    2.16 +        * ac_new_atoms -- min part or new atoms in new alternative core
    2.17 +            current GC is compared with each of already selected GCs if
    2.18 +            difference is less then ac_new_atoms, current GC is skipped
    2.19 +            difference = part of new atoms in current core
    2.20 +        * ac_count -- max number of cores (including main core)
    2.21 +            -1 means infinity
    2.22 +
    2.23 +        If more than one pdb chain for some sequence provided, consider all of them
    2.24 +        cost is calculated as 1 / (delta + 1)
    2.25 +
    2.26 +            delta in [0, +inf) => cost in (0, 1]
    2.27 +        """
    2.28 +        nodes = self.positions
    2.29 +        lines = {}
    2.30 +        for i in self.positions:
    2.31 +            for j in self.positions:
    2.32 +                if i < j:
    2.33 +                    distances = []
    2.34 +                    for sequence in self.sequences:
    2.35 +                        for chain in sequence.pdb_chains:
    2.36 +                            m1 = self.alignment.body[sequence][i]
    2.37 +                            m2 = self.alignment.body[sequence][j]
    2.38 +                            if m1 and m2:
    2.39 +                                r1 = sequence.pdb_residues[chain][m1]
    2.40 +                                r2 = sequence.pdb_residues[chain][m2]
    2.41 +                                ca1 = r1['CA']
    2.42 +                                ca2 = r2['CA']
    2.43 +                                d = ca1 - ca2 # Bio.PDB feature
    2.44 +                                distances.append(d)
    2.45 +                    if len(distances) >= 2:
    2.46 +                        delta = max(distances) - min(distances)
    2.47 +                        if delta <= max_delta:
    2.48 +                            lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
    2.49 +        graph = Graph(nodes, lines)
    2.50 +        cliques = graph.cliques(timeout=timeout, minsize=minsize)
    2.51 +        GCs = []
    2.52 +        for clique in cliques:
    2.53 +            for GC in GCs:
    2.54 +                if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
    2.55 +                    break
    2.56 +
    2.57      def ca_atoms(self, sequence, pdb_chain):
    2.58          """ Iterates Ca-atom of monomers of this sequence from this block  """
    2.59          return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())