Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/f3706f0b40ae/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:39:59 2014
Кодировка:
allpy: lib/block.py annotate

allpy

annotate lib/block.py @ 169:f3706f0b40ae

fix
author boris (netbook) <bnagaev@gmail.com>
date Tue, 02 Nov 2010 19:04:38 +0300
parents 675b402094be
children 854e33626cbe
rev   line source
BurkovBA@0 1 #!usr/bin/python
BurkovBA@0 2
BurkovBA@0 3 import sys
BurkovBA@0 4
BurkovBA@0 5 import project
BurkovBA@0 6 import sequence
BurkovBA@0 7 import monomer
bnagaev@116 8 import config
bnagaev@121 9 from graph import Graph
bnagaev@146 10 from Bio.PDB import Superimposer
bnagaev@149 11 from tempfile import NamedTemporaryFile
bnagaev@149 12 import os
bnagaev@151 13 from fasta import save_fasta
BurkovBA@0 14
BurkovBA@0 15 class Block(object):
bnagaev@147 16 """ Block of alignment
bnagaev@147 17
BurkovBA@0 18 Mandatory data:
BurkovBA@0 19 * self.project -- project object, which the block belongs to
BurkovBA@1 20 * self.sequences - set of sequence objects that contain monomers
BurkovBA@0 21 and/or gaps, that constitute the block
bnagaev@115 22 * self.positions -- sorted list of positions of the project.alignment that
BurkovBA@1 23 are included in the block
bnagaev@116 24
bnagaev@132 25 Don't change self.sequences -- it may be a link to other block.sequences
bnagaev@132 26
BurkovBA@0 27 How to create a new block:
BurkovBA@0 28 >>> import project
BurkovBA@0 29 >>> import block
BurkovBA@0 30 >>> proj = project.Project(open("test.fasta"))
bnagaev@114 31 >>> block1 = block.Block(proj)
BurkovBA@0 32 """
BurkovBA@0 33
bnagaev@112 34 def __init__(self, project, sequences=None, positions=None):
bnagaev@147 35 """ Builds new block from project
bnagaev@147 36
bnagaev@112 37 if sequences==None, all sequences are used
bnagaev@112 38 if positions==None, all positions are used
bnagaev@112 39 """
bnagaev@112 40 if sequences == None:
bnagaev@132 41 sequences = set(project.sequences) # copy
bnagaev@112 42 if positions == None:
bnagaev@112 43 positions = range(len(project))
BurkovBA@73 44 self.project = project
BurkovBA@73 45 self.sequences = sequences
BurkovBA@73 46 self.positions = positions
bnagaev@146 47
bnagaev@151 48 def save_fasta(self, out_file, long_line=70, gap='-'):
bnagaev@148 49 """ Saves alignment to given file in fasta-format
bnagaev@148 50
BurkovBA@0 51 No changes in the names, descriptions or order of the sequences
BurkovBA@0 52 are made.
BurkovBA@0 53 """
BurkovBA@0 54 for sequence in self.sequences:
bnagaev@113 55 alignment_monomers = self.project.alignment[sequence]
bnagaev@115 56 block_monomers = [alignment_monomers[i] for i in self.positions]
bnagaev@113 57 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
bnagaev@151 58 save_fasta(out_file, string, sequence.name, sequence.description, long_line)
bnagaev@116 59
bnagaev@127 60 def geometrical_cores(self, max_delta=config.delta,
bnagaev@129 61 timeout=config.timeout, minsize=config.minsize,
bnagaev@129 62 ac_new_atoms=config.ac_new_atoms,
bnagaev@129 63 ac_count=config.ac_count):
bnagaev@150 64 """ Returns length-sorted list of blocks, representing GCs
bnagaev@126 65
bnagaev@129 66 max_delta -- threshold of distance spreading
bnagaev@129 67 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
bnagaev@129 68 minsize -- min size of each core
bnagaev@129 69 ac_new_atoms -- min part or new atoms in new alternative core
bnagaev@129 70 current GC is compared with each of already selected GCs
bnagaev@129 71 if difference is less then ac_new_atoms, current GC is skipped
bnagaev@129 72 difference = part of new atoms in current core
bnagaev@129 73 ac_count -- max number of cores (including main core)
bnagaev@130 74 -1 means infinity
bnagaev@120 75 If more than one pdb chain for some sequence provided, consider all of them
bnagaev@120 76 cost is calculated as 1 / (delta + 1)
bnagaev@120 77 delta in [0, +inf) => cost in (0, 1]
bnagaev@116 78 """
bnagaev@117 79 nodes = self.positions