allpy
changeset 309:0c3c1856113a
Moved base.Blog.geometrical_cores to pdb.Block
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Thu, 16 Dec 2010 20:47:09 +0300 |
parents | 4d610c277281 |
children | 9ad22a1848fc |
files | allpy/base.py allpy/pdb.py |
diffstat | 2 files changed, 50 insertions(+), 55 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Thu Dec 16 20:45:57 2010 +0300 1.2 +++ b/allpy/base.py Thu Dec 16 20:47:09 2010 +0300 1.3 @@ -380,61 +380,6 @@ 1.4 string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) 1.5 save_fasta(out_file, string, sequence.name, sequence.description, long_line) 1.6 1.7 - def geometrical_cores(self, max_delta=config.delta, 1.8 - timeout=config.timeout, minsize=config.minsize, 1.9 - ac_new_atoms=config.ac_new_atoms, 1.10 - ac_count=config.ac_count): 1.11 - """ Returns length-sorted list of blocks, representing GCs 1.12 - 1.13 - * max_delta -- threshold of distance spreading 1.14 - * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) 1.15 - * minsize -- min size of each core 1.16 - * ac_new_atoms -- min part or new atoms in new alternative core 1.17 - current GC is compared with each of already selected GCs if 1.18 - difference is less then ac_new_atoms, current GC is skipped 1.19 - difference = part of new atoms in current core 1.20 - * ac_count -- max number of cores (including main core) 1.21 - -1 means infinity 1.22 - 1.23 - If more than one pdb chain for some sequence provided, consider all of them 1.24 - cost is calculated as 1 / (delta + 1) 1.25 - 1.26 - delta in [0, +inf) => cost in (0, 1] 1.27 - """ 1.28 - nodes = self.positions 1.29 - lines = {} 1.30 - for i in self.positions: 1.31 - for j in self.positions: 1.32 - if i < j: 1.33 - distances = [] 1.34 - for sequence in self.sequences: 1.35 - for chain in sequence.pdb_chains: 1.36 - m1 = self.alignment.body[sequence][i] 1.37 - m2 = self.alignment.body[sequence][j] 1.38 - if m1 and m2: 1.39 - r1 = sequence.pdb_residues[chain][m1] 1.40 - r2 = sequence.pdb_residues[chain][m2] 1.41 - ca1 = r1['CA'] 1.42 - ca2 = r2['CA'] 1.43 - d = ca1 - ca2 # Bio.PDB feature 1.44 - distances.append(d) 1.45 - if len(distances) >= 2: 1.46 - delta = max(distances) - min(distances) 1.47 - if delta <= max_delta: 1.48 - lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta) 1.49 - graph = Graph(nodes, lines) 1.50 - cliques = graph.cliques(timeout=timeout, minsize=minsize) 1.51 - GCs = [] 1.52 - for clique in cliques: 1.53 - for GC in GCs: 1.54 - if len(clique - set(GC.positions)) < ac_new_atoms * len(clique): 1.55 - break 1.56 - else: 1.57 - GCs.append(Block(self.alignment, self.sequences, clique)) 1.58 - if ac_count != -1 and len(GCs) >= ac_count: 1.59 - break 1.60 - return GCs 1.61 - 1.62 def xstring(self, x='X', gap='-'): 1.63 """ Returns string consisting of gap chars and chars x at self.positions 1.64
2.1 --- a/allpy/pdb.py Thu Dec 16 20:45:57 2010 +0300 2.2 +++ b/allpy/pdb.py Thu Dec 16 20:47:09 2010 +0300 2.3 @@ -230,6 +230,56 @@ 2.4 2.5 class Block(base.Block): 2.6 2.7 + def geometrical_cores(self, max_delta=config.delta, 2.8 + timeout=config.timeout, minsize=config.minsize, 2.9 + ac_new_atoms=config.ac_new_atoms, 2.10 + ac_count=config.ac_count): 2.11 + """ Returns length-sorted list of blocks, representing GCs 2.12 + 2.13 + * max_delta -- threshold of distance spreading 2.14 + * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) 2.15 + * minsize -- min size of each core 2.16 + * ac_new_atoms -- min part or new atoms in new alternative core 2.17 + current GC is compared with each of already selected GCs if 2.18 + difference is less then ac_new_atoms, current GC is skipped 2.19 + difference = part of new atoms in current core 2.20 + * ac_count -- max number of cores (including main core) 2.21 + -1 means infinity 2.22 + 2.23 + If more than one pdb chain for some sequence provided, consider all of them 2.24 + cost is calculated as 1 / (delta + 1) 2.25 + 2.26 + delta in [0, +inf) => cost in (0, 1] 2.27 + """ 2.28 + nodes = self.positions 2.29 + lines = {} 2.30 + for i in self.positions: 2.31 + for j in self.positions: 2.32 + if i < j: 2.33 + distances = [] 2.34 + for sequence in self.sequences: 2.35 + for chain in sequence.pdb_chains: 2.36 + m1 = self.alignment.body[sequence][i] 2.37 + m2 = self.alignment.body[sequence][j] 2.38 + if m1 and m2: 2.39 + r1 = sequence.pdb_residues[chain][m1] 2.40 + r2 = sequence.pdb_residues[chain][m2] 2.41 + ca1 = r1['CA'] 2.42 + ca2 = r2['CA'] 2.43 + d = ca1 - ca2 # Bio.PDB feature 2.44 + distances.append(d) 2.45 + if len(distances) >= 2: 2.46 + delta = max(distances) - min(distances) 2.47 + if delta <= max_delta: 2.48 + lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta) 2.49 + graph = Graph(nodes, lines) 2.50 + cliques = graph.cliques(timeout=timeout, minsize=minsize) 2.51 + GCs = [] 2.52 + for clique in cliques: 2.53 + for GC in GCs: 2.54 + if len(clique - set(GC.positions)) < ac_new_atoms * len(clique): 2.55 + break 2.56 + 2.57 def ca_atoms(self, sequence, pdb_chain): 2.58 """ Iterates Ca-atom of monomers of this sequence from this block """ 2.59 return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())