Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/08494fb2c47a/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:37:30 2014
Кодировка:
allpy: lib/block.py annotate

allpy

annotate lib/block.py @ 190:08494fb2c47a

repeats: visitors for visiting tree items (almost) completed (tree from 10 Nov 2010 of https://docs1.google.com/document/d/1-7kCo7cdYTpHOtPXGMdSG9wxoXh6sIvrNyYyZuCKECs/edit?hl=en#)
author boris (netbook) <bnagaev@gmail.com>
date Thu, 18 Nov 2010 00:01:07 +0300
parents 45573ee48844
children
rev   line source
BurkovBA@0 1 #!usr/bin/python
BurkovBA@0 2
BurkovBA@0 3 import sys
BurkovBA@0 4
bnagaev@175 5 import alignment
BurkovBA@0 6 import sequence
BurkovBA@0 7 import monomer
bnagaev@116 8 import config
bnagaev@121 9 from graph import Graph
bnagaev@146 10 from Bio.PDB import Superimposer
bnagaev@149 11 from tempfile import NamedTemporaryFile
bnagaev@149 12 import os
bnagaev@151 13 from fasta import save_fasta
BurkovBA@0 14
BurkovBA@0 15 class Block(object):
bnagaev@147 16 """ Block of alignment
bnagaev@147 17
BurkovBA@0 18 Mandatory data:
bnagaev@175 19 * self.alignment -- alignment object, which the block belongs to
BurkovBA@1 20 * self.sequences - set of sequence objects that contain monomers
BurkovBA@0 21 and/or gaps, that constitute the block
bnagaev@175 22 * self.positions -- list of positions of the alignment.body that
bnagaev@172 23 are included in the block; position[i+1] is always to the right from position[i]
bnagaev@116 24
bnagaev@132 25 Don't change self.sequences -- it may be a link to other block.sequences
bnagaev@132 26
BurkovBA@0 27 How to create a new block:
bnagaev@175 28 >>> import alignment
BurkovBA@0 29 >>> import block
bnagaev@175 30 >>> proj = alignment.Alignment(open("test.fasta"))
bnagaev@114 31 >>> block1 = block.Block(proj)
BurkovBA@0 32 """
BurkovBA@0 33
bnagaev@175 34 def __init__(self, alignment, sequences=None, positions=None):
bnagaev@175 35 """ Builds new block from alignment
bnagaev@147 36
bnagaev@112 37 if sequences==None, all sequences are used
bnagaev@112 38 if positions==None, all positions are used
bnagaev@112 39 """
bnagaev@112 40 if sequences == None:
bnagaev@175 41 sequences = set(alignment.sequences) # copy
bnagaev@112 42 if positions == None:
bnagaev@175 43 positions = range(len(alignment))
bnagaev@175 44 self.alignment = alignment
BurkovBA@73 45 self.sequences = sequences
BurkovBA@73 46 self.positions = positions
bnagaev@146 47
bnagaev@151 48 def save_fasta(self, out_file, long_line=70, gap='-'):
bnagaev@148 49 """ Saves alignment to given file in fasta-format
bnagaev@148 50
BurkovBA@0 51 No changes in the names, descriptions or order of the sequences
BurkovBA@0 52 are made.
BurkovBA@0 53 """
BurkovBA@0 54 for sequence in self.sequences:
bnagaev@175 55 alignment_monomers = self.alignment.body[sequence]
bnagaev@115 56 block_monomers = [alignment_monomers[i] for i in self.positions]
bnagaev@113 57 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
bnagaev@151 58 save_fasta(out_file, string, sequence.name, sequence.description, long_line)
bnagaev@116 59
bnagaev@127 60 def geometrical_cores(self, max_delta=config.delta,
bnagaev@129 61 timeout=config.timeout, minsize=config.minsize,
bnagaev@129 62 ac_new_atoms=config.ac_new_atoms,
bnagaev@129 63 ac_count=config.ac_count):
bnagaev@150 64 """ Returns length-sorted list of blocks, representing GCs
bnagaev@126 65
bnagaev@129 66 max_delta -- threshold of distance spreading
bnagaev@129 67 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
bnagaev@129 68 minsize -- min size of each core
bnagaev@129 69 ac_new_atoms -- min part or new atoms in new alternative core
bnagaev@129 70 current GC is compared with each of already selected GCs
bnagaev@129 71 if difference is less then ac_new_atoms, current GC is skipped
bnagaev@129 72 difference = part of new atoms in current core
bnagaev@129 73 ac_count -- max number of cores (including main core)
bnagaev@130 74 -1 means infinity
bnagaev@120 75 If more than one pdb chain for some sequence provided, consider all of them
bnagaev@120 76 cost is calculated as 1 / (delta + 1)
bnagaev@120 77 delta in [0, +inf) => cost in (0, 1]
bnagaev@116 78 """
bnagaev@117 79 nodes = self.positions
bnagaev@117 80 lines = {}
bnagaev@116 81 for i in self.positions:
bnagaev@116 82 for j in self.positions:
bnagaev@117 83 if i < j:
bnagaev@120 84 distances = []
bnagaev@120 85 for sequence in self.sequences:
bnagaev@120 86 for chain in sequence.pdb_chains:
bnagaev@175 87 m1 = self.alignment.body[sequence][i]
bnagaev@175 88 m2 = self.alignment.body[sequence][j]
bnagaev@122 89 if m1 and m2:
bnagaev@153 90 r1 = sequence.pdb_residues[chain][m1]
bnagaev@153 91 r2 = sequence.pdb_residues[chain][m2]
bnagaev@153 92 ca1 = r1['CA']
bnagaev@153 93 ca2 = r2['CA']
bnagaev@122 94 d = ca1 - ca2 # Bio.PDB feature
bnagaev@122 95 distances.append(d)
bnagaev@122 96 if len(distances) >= 2:
bnagaev@122 97 delta = max(distances) - min(distances)
bnagaev@122 98 if delta <= max_delta:
bnagaev@122 99 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
bnagaev@120 100 graph = Graph(nodes, lines)
bnagaev@129 101 cliques = graph.cliques(timeout=timeout, minsize=minsize)
bnagaev@129 102 GCs = []
bnagaev@129 103 for clique in cliques:
bnagaev@129 104 for GC in GCs:
bnagaev@129 105 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
bnagaev@129 106 break
bnagaev@129 107 else:
bnagaev@175 108 GCs.append(Block(self.alignment, self.sequences, clique))
bnagaev@130 109 if ac_count != -1 and len(GCs) >= ac_count:
bnagaev@129 110 break
bnagaev@129 111 return GCs
bnagaev@123 112
bnagaev@137 113 def xstring(self, x='X', gap='-'):
bnagaev@148 114 """ Returns string consisting of gap chars and chars x at self.positions
bnagaev@148 115
bnagaev@175 116 Length of returning string = length of alignment
bnagaev@123 117 """
bnagaev@175 118 monomers = [False] * len(self.alignment)
bnagaev@123 119 for i in self.positions:
bnagaev@123 120 monomers[i] = True
bnagaev@137 121 return ''.join([x if m else gap for m in monomers])
bnagaev@134 122
bnagaev@151 123 def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70):
bnagaev@148 124 """ Save xstring and name in fasta format """
bnagaev@151 125 save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line)
bnagaev@146 126
bnagaev@146 127 def monomers(self, sequence):
bnagaev@146 128 """ Iterates monomers of this sequence from this block """
bnagaev@175 129 alignment_sequence = self.alignment.body[sequence]
bnagaev@146 130 return (alignment_sequence[i] for i in self.positions)
bnagaev@146 131
bnagaev@146 132 def ca_atoms(self, sequence, pdb_chain):
bnagaev@146 133 """ Iterates Ca-atom of monomers of this sequence from this block """
bnagaev@153 134 return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
bnagaev@146 135
bnagaev@146 136 def sequences_chains(self):
bnagaev@146 137 """ Iterates pairs (sequence, chain) """
bnagaev@175 138 for sequence in self.alignment.sequences:
bnagaev@151 139 if sequence in self.sequences:
bnagaev@151 140 for chain in sequence.pdb_chains:
bnagaev@151 141 yield (sequence, chain)
bnagaev@146 142
bnagaev@146 143 def superimpose(self):
bnagaev@146 144 """ Superimpose all pdb_chains in this block """
bnagaev@149 145 sequences_chains = list(self.sequences_chains())
bnagaev@149 146 if len(sequences_chains) >= 1:
bnagaev@146 147 sup = Superimposer()
bnagaev@146 148 fixed_sequence, fixed_chain = sequences_chains.pop()
bnagaev@146 149 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
bnagaev@146 150 for sequence, chain in sequences_chains:
bnagaev@146 151 moving_atoms = self.ca_atoms(sequence, chain)
bnagaev@146 152 sup.set_atoms(fixed_atoms, moving_atoms)
bnagaev@146 153 # Apply rotation/translation to the moving atoms
bnagaev@146 154 sup.apply(moving_atoms)
bnagaev@146 155
bnagaev@146 156 def pdb_save(self, out_file):
bnagaev@149 157 """ Save all sequences
bnagaev@149 158
bnagaev@149 159 Returns {(sequence, chain): CHAIN}
bnagaev@149 160 CHAIN is chain letter in new file
bnagaev@149 161 """
bnagaev@149 162 tmp_file = NamedTemporaryFile(delete=False)
bnagaev@149 163 tmp_file.close()
bnagaev@149 164
bnagaev@149 165 for sequence, chain in self.sequences_chains():
bnagaev@149 166 sequence.pdb_save(tmp_file.name, chain)
bnagaev@149 167 # TODO: read from tmp_file.name
bnagaev@149 168 # change CHAIN
bnagaev@149 169 # add to out_file
bnagaev@149 170
bnagaev@149 171 os.unlink(NamedTemporaryFile)
bnagaev@146 172