Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/4b7a31ae5faa/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:36:57 2014
Кодировка:
allpy: lib/block.py annotate

allpy

annotate lib/block.py @ 132:4b7a31ae5faa

lib.block -- copy of sequences --> link to old sequences, when create from block
author boris <bnagaev@gmail.com>
date Sun, 24 Oct 2010 11:42:21 +0400
parents d591f3caf6fb
children 425d94783375
rev   line source
BurkovBA@0 1 #!usr/bin/python
BurkovBA@0 2
BurkovBA@0 3 import sys
BurkovBA@0 4
BurkovBA@0 5 import project
BurkovBA@0 6 import sequence
BurkovBA@0 7 import monomer
bnagaev@116 8 import config
bnagaev@121 9 from graph import Graph
bnagaev@129 10 from copy import copy
BurkovBA@0 11
BurkovBA@0 12 class Block(object):
BurkovBA@0 13 """
BurkovBA@0 14 Mandatory data:
BurkovBA@0 15 * self.project -- project object, which the block belongs to
BurkovBA@1 16 * self.sequences - set of sequence objects that contain monomers
BurkovBA@0 17 and/or gaps, that constitute the block
bnagaev@115 18 * self.positions -- sorted list of positions of the project.alignment that
BurkovBA@1 19 are included in the block
bnagaev@116 20
bnagaev@132 21 Don't change self.sequences -- it may be a link to other block.sequences
bnagaev@132 22
BurkovBA@0 23 How to create a new block:
BurkovBA@0 24 >>> import project
BurkovBA@0 25 >>> import block
BurkovBA@0 26 >>> proj = project.Project(open("test.fasta"))
bnagaev@114 27 >>> block1 = block.Block(proj)
BurkovBA@0 28 """
BurkovBA@0 29
bnagaev@112 30 def __init__(self, project, sequences=None, positions=None):
bnagaev@112 31 """
bnagaev@112 32 Builds new block from project
bnagaev@112 33 if sequences==None, all sequences are used
bnagaev@112 34 if positions==None, all positions are used
bnagaev@112 35 """
bnagaev@112 36 if sequences == None:
bnagaev@132 37 sequences = set(project.sequences) # copy
bnagaev@112 38 if positions == None:
bnagaev@112 39 positions = range(len(project))
BurkovBA@73 40 self.project = project
BurkovBA@73 41 self.sequences = sequences
BurkovBA@73 42 self.positions = positions
BurkovBA@0 43
bnagaev@112 44 def save_fasta(self, out_file, long_line=60):
bnagaev@112 45 """
bnagaev@112 46 Saves alignment to given file in fasta-format
bnagaev@112 47 Splits long lines to substrings of length=long_line
bnagaev@112 48 To prevent this, set long_line=None
bnagaev@112 49
BurkovBA@0 50 No changes in the names, descriptions or order of the sequences
BurkovBA@0 51 are made.
BurkovBA@0 52 """
BurkovBA@0 53 for sequence in self.sequences:
bnagaev@112 54 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)
bnagaev@113 55 alignment_monomers = self.project.alignment[sequence]
bnagaev@115 56 block_monomers = [alignment_monomers[i] for i in self.positions]
bnagaev@113 57 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
bnagaev@112 58 if long_line:
bnagaev@112 59 for i in range(0, len(string) // long_line + 1):
bnagaev@112 60 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
bnagaev@112 61 else:
bnagaev@112 62 out_file.write("%s \n" % string)
bnagaev@116 63
bnagaev@127 64 def geometrical_cores(self, max_delta=config.delta,
bnagaev@129 65 timeout=config.timeout, minsize=config.minsize,
bnagaev@129 66 ac_new_atoms=config.ac_new_atoms,
bnagaev@129 67 ac_count=config.ac_count):
bnagaev@116 68 """
bnagaev@129 69 returns length-sorted list of blocks, representing GCs
bnagaev@126 70
bnagaev@129 71 max_delta -- threshold of distance spreading
bnagaev@129 72 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
bnagaev@129 73 minsize -- min size of each core
bnagaev@129 74 ac_new_atoms -- min part or new atoms in new alternative core
bnagaev@129 75 current GC is compared with each of already selected GCs
bnagaev@129 76 if difference is less then ac_new_atoms, current GC is skipped
bnagaev@129 77 difference = part of new atoms in current core
bnagaev@129 78 ac_count -- max number of cores (including main core)
bnagaev@130 79 -1 means infinity
bnagaev@120 80 If more than one pdb chain for some sequence provided, consider all of them
bnagaev@120 81 cost is calculated as 1 / (delta + 1)
bnagaev@120 82 delta in [0, +inf) => cost in (0, 1]
bnagaev@116 83 """
bnagaev@117 84 nodes = self.positions
bnagaev@117 85 lines = {}
bnagaev@116 86 for i in self.positions:
bnagaev@116 87 for j in self.positions:
bnagaev@117 88 if i < j:
bnagaev@120 89 distances = []
bnagaev@120 90 for sequence in self.sequences:
bnagaev@120 91 for chain in sequence.pdb_chains:
bnagaev@120 92 m1 = self.project.alignment[sequence][i]
bnagaev@120 93 m2 = self.project.alignment[sequence][j]
bnagaev@122 94 if m1 and m2:
bnagaev@122 95 ca1 = m1.pdb_residues[chain]['CA']
bnagaev@122 96 ca2 = m2.pdb_residues[chain]['CA']
bnagaev@122 97 d = ca1 - ca2 # Bio.PDB feature
bnagaev@122 98 distances.append(d)
bnagaev@122 99 if len(distances) >= 2:
bnagaev@122 100 delta = max(distances) - min(distances)
bnagaev@122 101 if delta <= max_delta:
bnagaev@122 102 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
bnagaev@120 103 graph = Graph(nodes, lines)
bnagaev@129 104 cliques = graph.cliques(timeout=timeout, minsize=minsize)
bnagaev@129 105 GCs = []
bnagaev@129 106 for clique in cliques:
bnagaev@129 107 for GC in GCs:
bnagaev@129 108 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
bnagaev@129 109 break
bnagaev@129 110 else:
bnagaev@132 111 GCs.append(Block(self.project, self.sequences, clique))
bnagaev@130 112 if ac_count != -1 and len(GCs) >= ac_count:
bnagaev@129 113 break
bnagaev@129 114 return GCs
bnagaev@123 115
bnagaev@128 116 def xstring(self, x='X'):
bnagaev@123 117 """
bnagaev@123 118 Returns string consisting of '-' and chars x at self.positions
bnagaev@123 119 Length of returning string = length of project
bnagaev@123 120 """
bnagaev@123 121 monomers = [False] * len(self.project)
bnagaev@123 122 for i in self.positions:
bnagaev@123 123 monomers[i] = True
bnagaev@123 124 return ''.join([x if m else '-' for m in monomers])