allpy
diff allpy/block.py @ 261:d60628e29b24
Moved contents of block.py to base.py without any changes.
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Tue, 14 Dec 2010 21:02:49 +0300 |
parents | 73f9779491ef |
children |
line diff
1.1 --- a/allpy/block.py Tue Dec 14 20:51:53 2010 +0300 1.2 +++ b/allpy/block.py Tue Dec 14 21:02:49 2010 +0300 1.3 @@ -1,172 +1,2 @@ 1.4 #!usr/bin/python 1.5 1.6 -import sys 1.7 - 1.8 -import alignment 1.9 -import sequence 1.10 -import monomer 1.11 -import config 1.12 -from graph import Graph 1.13 -from Bio.PDB import Superimposer 1.14 -from tempfile import NamedTemporaryFile 1.15 -import os 1.16 -from fasta import save_fasta 1.17 - 1.18 -class Block(object): 1.19 - """ Block of alignment 1.20 - 1.21 - Mandatory data: 1.22 - * self.alignment -- alignment object, which the block belongs to 1.23 - * self.sequences - set of sequence objects that contain monomers 1.24 - and/or gaps, that constitute the block 1.25 - * self.positions -- list of positions of the alignment.body that 1.26 - are included in the block; position[i+1] is always to the right from position[i] 1.27 - 1.28 - Don't change self.sequences -- it may be a link to other block.sequences 1.29 - 1.30 - How to create a new block: 1.31 - >>> import alignment 1.32 - >>> import block 1.33 - >>> proj = alignment.Alignment(open("test.fasta")) 1.34 - >>> block1 = block.Block(proj) 1.35 - """ 1.36 - 1.37 - def __init__(self, alignment, sequences=None, positions=None): 1.38 - """ Builds new block from alignment 1.39 - 1.40 - if sequences==None, all sequences are used 1.41 - if positions==None, all positions are used 1.42 - """ 1.43 - if sequences == None: 1.44 - sequences = set(alignment.sequences) # copy 1.45 - if positions == None: 1.46 - positions = range(len(alignment)) 1.47 - self.alignment = alignment 1.48 - self.sequences = sequences 1.49 - self.positions = positions 1.50 - 1.51 - def save_fasta(self, out_file, long_line=70, gap='-'): 1.52 - """ Saves alignment to given file in fasta-format 1.53 - 1.54 - No changes in the names, descriptions or order of the sequences 1.55 - are made. 1.56 - """ 1.57 - for sequence in self.sequences: 1.58 - alignment_monomers = self.alignment.body[sequence] 1.59 - block_monomers = [alignment_monomers[i] for i in self.positions] 1.60 - string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) 1.61 - save_fasta(out_file, string, sequence.name, sequence.description, long_line) 1.62 - 1.63 - def geometrical_cores(self, max_delta=config.delta, 1.64 - timeout=config.timeout, minsize=config.minsize, 1.65 - ac_new_atoms=config.ac_new_atoms, 1.66 - ac_count=config.ac_count): 1.67 - """ Returns length-sorted list of blocks, representing GCs 1.68 - 1.69 - max_delta -- threshold of distance spreading 1.70 - timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) 1.71 - minsize -- min size of each core 1.72 - ac_new_atoms -- min part or new atoms in new alternative core 1.73 - current GC is compared with each of already selected GCs 1.74 - if difference is less then ac_new_atoms, current GC is skipped 1.75 - difference = part of new atoms in current core 1.76 - ac_count -- max number of cores (including main core) 1.77 - -1 means infinity 1.78 - If more than one pdb chain for some sequence provided, consider all of them 1.79 - cost is calculated as 1 / (delta + 1) 1.80 - delta in [0, +inf) => cost in (0, 1] 1.81 - """ 1.82 - nodes = self.positions 1.83 - lines = {} 1.84 - for i in self.positions: 1.85 - for j in self.positions: 1.86 - if i < j: 1.87 - distances = [] 1.88 - for sequence in self.sequences: 1.89 - for chain in sequence.pdb_chains: 1.90 - m1 = self.alignment.body[sequence][i] 1.91 - m2 = self.alignment.body[sequence][j] 1.92 - if m1 and m2: 1.93 - r1 = sequence.pdb_residues[chain][m1] 1.94 - r2 = sequence.pdb_residues[chain][m2] 1.95 - ca1 = r1['CA'] 1.96 - ca2 = r2['CA'] 1.97 - d = ca1 - ca2 # Bio.PDB feature 1.98 - distances.append(d) 1.99 - if len(distances) >= 2: 1.100 - delta = max(distances) - min(distances) 1.101 - if delta <= max_delta: 1.102 - lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta) 1.103 - graph = Graph(nodes, lines) 1.104 - cliques = graph.cliques(timeout=timeout, minsize=minsize) 1.105 - GCs = [] 1.106 - for clique in cliques: 1.107 - for GC in GCs: 1.108 - if len(clique - set(GC.positions)) < ac_new_atoms * len(clique): 1.109 - break 1.110 - else: 1.111 - GCs.append(Block(self.alignment, self.sequences, clique)) 1.112 - if ac_count != -1 and len(GCs) >= ac_count: 1.113 - break 1.114 - return GCs 1.115 - 1.116 - def xstring(self, x='X', gap='-'): 1.117 - """ Returns string consisting of gap chars and chars x at self.positions 1.118 - 1.119 - Length of returning string = length of alignment 1.120 - """ 1.121 - monomers = [False] * len(self.alignment) 1.122 - for i in self.positions: 1.123 - monomers[i] = True 1.124 - return ''.join([x if m else gap for m in monomers]) 1.125 - 1.126 - def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70): 1.127 - """ Save xstring and name in fasta format """ 1.128 - save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line) 1.129 - 1.130 - def monomers(self, sequence): 1.131 - """ Iterates monomers of this sequence from this block """ 1.132 - alignment_sequence = self.alignment.body[sequence] 1.133 - return (alignment_sequence[i] for i in self.positions) 1.134 - 1.135 - def ca_atoms(self, sequence, pdb_chain): 1.136 - """ Iterates Ca-atom of monomers of this sequence from this block """ 1.137 - return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers()) 1.138 - 1.139 - def sequences_chains(self): 1.140 - """ Iterates pairs (sequence, chain) """ 1.141 - for sequence in self.alignment.sequences: 1.142 - if sequence in self.sequences: 1.143 - for chain in sequence.pdb_chains: 1.144 - yield (sequence, chain) 1.145 - 1.146 - def superimpose(self): 1.147 - """ Superimpose all pdb_chains in this block """ 1.148 - sequences_chains = list(self.sequences_chains()) 1.149 - if len(sequences_chains) >= 1: 1.150 - sup = Superimposer() 1.151 - fixed_sequence, fixed_chain = sequences_chains.pop() 1.152 - fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain) 1.153 - for sequence, chain in sequences_chains: 1.154 - moving_atoms = self.ca_atoms(sequence, chain) 1.155 - sup.set_atoms(fixed_atoms, moving_atoms) 1.156 - # Apply rotation/translation to the moving atoms 1.157 - sup.apply(moving_atoms) 1.158 - 1.159 - def pdb_save(self, out_file): 1.160 - """ Save all sequences 1.161 - 1.162 - Returns {(sequence, chain): CHAIN} 1.163 - CHAIN is chain letter in new file 1.164 - """ 1.165 - tmp_file = NamedTemporaryFile(delete=False) 1.166 - tmp_file.close() 1.167 - 1.168 - for sequence, chain in self.sequences_chains(): 1.169 - sequence.pdb_save(tmp_file.name, chain) 1.170 - # TODO: read from tmp_file.name 1.171 - # change CHAIN 1.172 - # add to out_file 1.173 - 1.174 - os.unlink(NamedTemporaryFile) 1.175 -