Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/5e7851709c95/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:36:25 2014
Кодировка:
allpy: lib/block.py annotate

allpy

annotate lib/block.py @ 120:5e7851709c95

lib::block geometrical_core completed, not tested
author boris <bnagaev@gmail.com>
date Sat, 23 Oct 2010 22:45:35 +0400
parents fd7371f14c6d
children 0903179f8c34
rev   line source
BurkovBA@0 1 #!usr/bin/python
BurkovBA@0 2
BurkovBA@0 3 import sys
BurkovBA@0 4
BurkovBA@0 5 import project
BurkovBA@0 6 import sequence
BurkovBA@0 7 import monomer
bnagaev@116 8 import config
BurkovBA@0 9
BurkovBA@0 10 class Block(object):
BurkovBA@0 11 """
BurkovBA@0 12 Mandatory data:
BurkovBA@0 13 * self.project -- project object, which the block belongs to
BurkovBA@1 14 * self.sequences - set of sequence objects that contain monomers
BurkovBA@0 15 and/or gaps, that constitute the block
bnagaev@115 16 * self.positions -- sorted list of positions of the project.alignment that
BurkovBA@1 17 are included in the block
bnagaev@116 18
BurkovBA@0 19 How to create a new block:
BurkovBA@0 20 >>> import project
BurkovBA@0 21 >>> import block
BurkovBA@0 22 >>> proj = project.Project(open("test.fasta"))
bnagaev@114 23 >>> block1 = block.Block(proj)
BurkovBA@0 24 """
BurkovBA@0 25
bnagaev@112 26 def __init__(self, project, sequences=None, positions=None):
bnagaev@112 27 """
bnagaev@112 28 Builds new block from project
bnagaev@112 29 if sequences==None, all sequences are used
bnagaev@112 30 if positions==None, all positions are used
bnagaev@112 31 """
bnagaev@112 32 if sequences == None:
bnagaev@112 33 sequences = project.sequences
bnagaev@112 34 if positions == None:
bnagaev@112 35 positions = range(len(project))
BurkovBA@73 36 self.project = project
BurkovBA@73 37 self.sequences = sequences
BurkovBA@73 38 self.positions = positions
BurkovBA@0 39
bnagaev@112 40 def save_fasta(self, out_file, long_line=60):
bnagaev@112 41 """
bnagaev@112 42 Saves alignment to given file in fasta-format
bnagaev@112 43 Splits long lines to substrings of length=long_line
bnagaev@112 44 To prevent this, set long_line=None
bnagaev@112 45
BurkovBA@0 46 No changes in the names, descriptions or order of the sequences
BurkovBA@0 47 are made.
BurkovBA@0 48 """
BurkovBA@0 49 for sequence in self.sequences:
bnagaev@112 50 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)
bnagaev@113 51 alignment_monomers = self.project.alignment[sequence]
bnagaev@115 52 block_monomers = [alignment_monomers[i] for i in self.positions]
bnagaev@113 53 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
bnagaev@112 54 if long_line:
bnagaev@112 55 for i in range(0, len(string) // long_line + 1):
bnagaev@112 56 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
bnagaev@112 57 else:
bnagaev@112 58 out_file.write("%s \n" % string)
bnagaev@116 59
bnagaev@120 60 def geometrical_core(self, max_delta=config.delta,
bnagaev@120 61 timeout=config.timeout, minsize=config.minsize):
bnagaev@116 62 """
bnagaev@116 63 returns sorted list of positions, representing geometrical core
bnagaev@116 64 delta -- threshold of distance spreading
bnagaev@120 65
bnagaev@120 66 If more than one pdb chain for some sequence provided, consider all of them
bnagaev@120 67 cost is calculated as 1 / (delta + 1)
bnagaev@120 68 delta in [0, +inf) => cost in (0, 1]
bnagaev@116 69 """
bnagaev@117 70 nodes = self.positions
bnagaev@117 71 lines = {}
bnagaev@116 72 for i in self.positions:
bnagaev@116 73 for j in self.positions:
bnagaev@117 74 if i < j:
bnagaev@120 75 distances = []
bnagaev@120 76 for sequence in self.sequences:
bnagaev@120 77 for chain in sequence.pdb_chains:
bnagaev@120 78 m1 = self.project.alignment[sequence][i]
bnagaev@120 79 m2 = self.project.alignment[sequence][j]
bnagaev@120 80 d = m1.pdb_residues[chain] - m2.pdb_residues[chain]
bnagaev@120 81 distances.append(d)
bnagaev@120 82 delta = max(distances) - min(distances)
bnagaev@120 83 if delta <= max_delta:
bnagaev@120 84 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
bnagaev@120 85 graph = Graph(nodes, lines)
bnagaev@120 86 return graph.cliques(timeout=timeout, minsize=minsize)
bnagaev@116 87
bnagaev@116 88