Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/f7dead025719/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:36:41 2014
Кодировка:
allpy: lib/block.py annotate

allpy

annotate lib/block.py @ 150:f7dead025719

documentation improvements
author boris (netbook) <bnagaev@gmail.com>
date Mon, 25 Oct 2010 13:30:11 +0400
parents 85fc264975a2
children 675b402094be
rev   line source
BurkovBA@0 1 #!usr/bin/python
BurkovBA@0 2
BurkovBA@0 3 import sys
BurkovBA@0 4
BurkovBA@0 5 import project
BurkovBA@0 6 import sequence
BurkovBA@0 7 import monomer
bnagaev@116 8 import config
bnagaev@121 9 from graph import Graph
bnagaev@146 10 from Bio.PDB import Superimposer
bnagaev@149 11 from tempfile import NamedTemporaryFile
bnagaev@149 12 import os
BurkovBA@0 13
BurkovBA@0 14 class Block(object):
bnagaev@147 15 """ Block of alignment
bnagaev@147 16
BurkovBA@0 17 Mandatory data:
BurkovBA@0 18 * self.project -- project object, which the block belongs to
BurkovBA@1 19 * self.sequences - set of sequence objects that contain monomers
BurkovBA@0 20 and/or gaps, that constitute the block
bnagaev@115 21 * self.positions -- sorted list of positions of the project.alignment that
BurkovBA@1 22 are included in the block
bnagaev@116 23
bnagaev@132 24 Don't change self.sequences -- it may be a link to other block.sequences
bnagaev@132 25
BurkovBA@0 26 How to create a new block:
BurkovBA@0 27 >>> import project
BurkovBA@0 28 >>> import block
BurkovBA@0 29 >>> proj = project.Project(open("test.fasta"))
bnagaev@114 30 >>> block1 = block.Block(proj)
BurkovBA@0 31 """
BurkovBA@0 32
bnagaev@112 33 def __init__(self, project, sequences=None, positions=None):
bnagaev@147 34 """ Builds new block from project
bnagaev@147 35
bnagaev@112 36 if sequences==None, all sequences are used
bnagaev@112 37 if positions==None, all positions are used
bnagaev@112 38 """
bnagaev@112 39 if sequences == None:
bnagaev@132 40 sequences = set(project.sequences) # copy
bnagaev@112 41 if positions == None:
bnagaev@112 42 positions = range(len(project))
BurkovBA@73 43 self.project = project
BurkovBA@73 44 self.sequences = sequences
BurkovBA@73 45 self.positions = positions
bnagaev@146 46
bnagaev@137 47 def save_fasta(self, out_file, long_line=60, gap='-'):
bnagaev@148 48 """ Saves alignment to given file in fasta-format
bnagaev@148 49
bnagaev@112 50 Splits long lines to substrings of length=long_line
bnagaev@112 51 To prevent this, set long_line=None
bnagaev@112 52
BurkovBA@0 53 No changes in the names, descriptions or order of the sequences
BurkovBA@0 54 are made.
BurkovBA@0 55 """
BurkovBA@0 56 for sequence in self.sequences:
bnagaev@112 57 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)
bnagaev@113 58 alignment_monomers = self.project.alignment[sequence]
bnagaev@115 59 block_monomers = [alignment_monomers[i] for i in self.positions]
bnagaev@113 60 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
bnagaev@112 61 if long_line:
bnagaev@112 62 for i in range(0, len(string) // long_line + 1):
bnagaev@112 63 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
bnagaev@112 64 else:
bnagaev@112 65 out_file.write("%s \n" % string)
bnagaev@116 66
bnagaev@127 67 def geometrical_cores(self, max_delta=config.delta,
bnagaev@129 68 timeout=config.timeout, minsize=config.minsize,
bnagaev@129 69 ac_new_atoms=config.ac_new_atoms,
bnagaev@129 70 ac_count=config.ac_count):
bnagaev@150 71 """ Returns length-sorted list of blocks, representing GCs
bnagaev@126 72
bnagaev@129 73 max_delta -- threshold of distance spreading
bnagaev@129 74 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
bnagaev@129 75 minsize -- min size of each core
bnagaev@129 76 ac_new_atoms -- min part or new atoms in new alternative core
bnagaev@129 77 current GC is compared with each of already selected GCs
bnagaev@129 78 if difference is less then ac_new_atoms, current GC is skipped
bnagaev@129 79 difference = part of new atoms in current core
bnagaev@129 80 ac_count -- max number of cores (including main core)
bnagaev@130 81 -1 means infinity
bnagaev@120 82 If more than one pdb chain for some sequence provided, consider all of them
bnagaev@120 83 cost is calculated as 1 / (delta + 1)
bnagaev@120 84 delta in [0, +inf) => cost in (0, 1]
bnagaev@116 85 """
bnagaev@117 86 nodes = self.positions
bnagaev@117 87 lines = {}
bnagaev@116 88 for i in self.positions:
bnagaev@116 89 for j in self.positions:
bnagaev@117 90 if i < j:
bnagaev@120 91 distances = []
bnagaev@120 92 for sequence in self.sequences:
bnagaev@120 93 for chain in sequence.pdb_chains:
bnagaev@120 94 m1 = self.project.alignment[sequence][i]
bnagaev@120 95 m2 = self.project.alignment[sequence][j]
bnagaev@122 96 if m1 and m2:
bnagaev@122 97 ca1 = m1.pdb_residues[chain]['CA']
bnagaev@122 98 ca2 = m2.pdb_residues[chain]['CA']
bnagaev@122 99 d = ca1 - ca2 # Bio.PDB feature
bnagaev@122 100 distances.append(d)
bnagaev@122 101 if len(distances) >= 2:
bnagaev@122 102 delta = max(distances) - min(distances)
bnagaev@122 103 if delta <= max_delta:
bnagaev@122 104 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
bnagaev@120 105 graph = Graph(nodes, lines)
bnagaev@129 106 cliques = graph.cliques(timeout=timeout, minsize=minsize)
bnagaev@129 107 GCs = []
bnagaev@129 108 for clique in cliques:
bnagaev@129 109 for GC in GCs:
bnagaev@129 110 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
bnagaev@129 111 break
bnagaev@129 112 else:
bnagaev@132 113 GCs.append(Block(self.project, self.sequences, clique))
bnagaev@130 114 if ac_count != -1 and len(GCs) >= ac_count:
bnagaev@129 115 break
bnagaev@129 116 return GCs
bnagaev@123 117
bnagaev@137 118 def xstring(self, x='X', gap='-'):
bnagaev@148 119 """ Returns string consisting of gap chars and chars x at self.positions
bnagaev@148 120
bnagaev@123 121 Length of returning string = length of project
bnagaev@123 122 """
bnagaev@123 123 monomers = [False] * len(self.project)
bnagaev@123 124 for i in self.positions:
bnagaev@123 125 monomers[i] = True
bnagaev@137 126 return ''.join([x if m else gap for m in monomers])
bnagaev@134 127
bnagaev@137 128 def save_xstring(self, out_file, name, description='', x='X', gap='-'):
bnagaev@148 129 """ Save xstring and name in fasta format """
bnagaev@134 130 out_file.write(">%(name)s %(description)s \n" % \
bnagaev@134 131 {'name':name, 'description':description})
bnagaev@134 132
bnagaev@142 133 out_file.write("%(xstring)s \n" % {'xstring':self.xstring(x=x, gap=gap)})
bnagaev@146 134
bnagaev@146 135 def monomers(self, sequence):
bnagaev@146 136 """ Iterates monomers of this sequence from this block """
bnagaev@146 137 alignment_sequence = self.project.alignment[sequence]
bnagaev@146 138 return (alignment_sequence[i] for i in self.positions)
bnagaev@146 139
bnagaev@146 140 def ca_atoms(self, sequence, pdb_chain):
bnagaev@146 141 """ Iterates Ca-atom of monomers of this sequence from this block """
bnagaev@146 142 return (monomer.pdb_residues[pdb_chain] for monomer in self.monomers())
bnagaev@146 143
bnagaev@146 144 def sequences_chains(self):
bnagaev@146 145 """ Iterates pairs (sequence, chain) """
bnagaev@146 146 for sequence in self.sequences:
bnagaev@146 147 for chain in sequence.pdb_chains:
bnagaev@146 148 yield (sequence, chain)
bnagaev@146 149
bnagaev@146 150 def superimpose(self):
bnagaev@146 151 """ Superimpose all pdb_chains in this block """
bnagaev@149 152 sequences_chains = list(self.sequences_chains())
bnagaev@149 153 if len(sequences_chains) >= 1:
bnagaev@146 154 sup = Superimposer()
bnagaev@146 155 fixed_sequence, fixed_chain = sequences_chains.pop()
bnagaev@146 156 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
bnagaev@146 157 for sequence, chain in sequences_chains:
bnagaev@146 158 moving_atoms = self.ca_atoms(sequence, chain)
bnagaev@146 159 sup.set_atoms(fixed_atoms, moving_atoms)
bnagaev@146 160 # Apply rotation/translation to the moving atoms
bnagaev@146 161 sup.apply(moving_atoms)
bnagaev@146 162
bnagaev@146 163 def pdb_save(self, out_file):
bnagaev@149 164 """ Save all sequences
bnagaev@149 165
bnagaev@149 166 Returns {(sequence, chain): CHAIN}
bnagaev@149 167 CHAIN is chain letter in new file
bnagaev@149 168 """
bnagaev@149 169 tmp_file = NamedTemporaryFile(delete=False)
bnagaev@149 170 tmp_file.close()
bnagaev@149 171
bnagaev@149 172 for sequence, chain in self.sequences_chains():
bnagaev@149 173 sequence.pdb_save(tmp_file.name, chain)
bnagaev@149 174 # TODO: read from tmp_file.name
bnagaev@149 175 # change CHAIN
bnagaev@149 176 # add to out_file
bnagaev@149 177
bnagaev@149 178 os.unlink(NamedTemporaryFile)
bnagaev@146 179
bnagaev@146 180