| rev |
line source |
|
BurkovBA@0
|
1 #!usr/bin/python |
|
BurkovBA@0
|
2 |
|
BurkovBA@0
|
3 import sys |
|
BurkovBA@0
|
4 |
|
BurkovBA@0
|
5 import project |
|
BurkovBA@0
|
6 import sequence |
|
BurkovBA@0
|
7 import monomer |
|
bnagaev@116
|
8 import config |
|
bnagaev@121
|
9 from graph import Graph |
|
bnagaev@146
|
10 from Bio.PDB import Superimposer |
|
bnagaev@149
|
11 from tempfile import NamedTemporaryFile |
|
bnagaev@149
|
12 import os |
|
bnagaev@151
|
13 from fasta import save_fasta |
|
BurkovBA@0
|
14 |
|
BurkovBA@0
|
15 class Block(object): |
|
bnagaev@147
|
16 """ Block of alignment |
|
bnagaev@147
|
17 |
|
BurkovBA@0
|
18 Mandatory data: |
|
BurkovBA@0
|
19 * self.project -- project object, which the block belongs to |
|
BurkovBA@1
|
20 * self.sequences - set of sequence objects that contain monomers |
|
BurkovBA@0
|
21 and/or gaps, that constitute the block |
|
bnagaev@115
|
22 * self.positions -- sorted list of positions of the project.alignment that |
|
BurkovBA@1
|
23 are included in the block |
|
bnagaev@116
|
24 |
|
bnagaev@132
|
25 Don't change self.sequences -- it may be a link to other block.sequences |
|
bnagaev@132
|
26 |
|
BurkovBA@0
|
27 How to create a new block: |
|
BurkovBA@0
|
28 >>> import project |
|
BurkovBA@0
|
29 >>> import block |
|
BurkovBA@0
|
30 >>> proj = project.Project(open("test.fasta")) |
|
bnagaev@114
|
31 >>> block1 = block.Block(proj) |
|
BurkovBA@0
|
32 """ |
|
BurkovBA@0
|
33 |
|
bnagaev@112
|
34 def __init__(self, project, sequences=None, positions=None): |
|
bnagaev@147
|
35 """ Builds new block from project |
|
bnagaev@147
|
36 |
|
bnagaev@112
|
37 if sequences==None, all sequences are used |
|
bnagaev@112
|
38 if positions==None, all positions are used |
|
bnagaev@112
|
39 """ |
|
bnagaev@112
|
40 if sequences == None: |
|
bnagaev@132
|
41 sequences = set(project.sequences) # copy |
|
bnagaev@112
|
42 if positions == None: |
|
bnagaev@112
|
43 positions = range(len(project)) |
|
BurkovBA@73
|
44 self.project = project |
|
BurkovBA@73
|
45 self.sequences = sequences |
|
BurkovBA@73
|
46 self.positions = positions |
|
bnagaev@146
|
47 |
|
bnagaev@151
|
48 def save_fasta(self, out_file, long_line=70, gap='-'): |
|
bnagaev@148
|
49 """ Saves alignment to given file in fasta-format |
|
bnagaev@148
|
50 |
|
BurkovBA@0
|
51 No changes in the names, descriptions or order of the sequences |
|
BurkovBA@0
|
52 are made. |
|
BurkovBA@0
|
53 """ |
|
BurkovBA@0
|
54 for sequence in self.sequences: |
|
bnagaev@113
|
55 alignment_monomers = self.project.alignment[sequence] |
|
bnagaev@115
|
56 block_monomers = [alignment_monomers[i] for i in self.positions] |
|
bnagaev@113
|
57 string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) |
|
bnagaev@151
|
58 save_fasta(out_file, string, sequence.name, sequence.description, long_line) |
|
bnagaev@116
|
59 |
|
bnagaev@127
|
60 def geometrical_cores(self, max_delta=config.delta, |
|
bnagaev@129
|
61 timeout=config.timeout, minsize=config.minsize, |
|
bnagaev@129
|
62 ac_new_atoms=config.ac_new_atoms, |
|
bnagaev@129
|
63 ac_count=config.ac_count): |
|
bnagaev@150
|
64 """ Returns length-sorted list of blocks, representing GCs |
|
bnagaev@126
|
65 |
|
bnagaev@129
|
66 max_delta -- threshold of distance spreading |
|
bnagaev@129
|
67 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) |
|
bnagaev@129
|
68 minsize -- min size of each core |
|
bnagaev@129
|
69 ac_new_atoms -- min part or new atoms in new alternative core |
|
bnagaev@129
|
70 current GC is compared with each of already selected GCs |
|
bnagaev@129
|
71 if difference is less then ac_new_atoms, current GC is skipped |
|
bnagaev@129
|
72 difference = part of new atoms in current core |
|
bnagaev@129
|
73 ac_count -- max number of cores (including main core) |
|
bnagaev@130
|
74 -1 means infinity |
|
bnagaev@120
|
75 If more than one pdb chain for some sequence provided, consider all of them |
|
bnagaev@120
|
76 cost is calculated as 1 / (delta + 1) |
|
bnagaev@120
|
77 delta in [0, +inf) => cost in (0, 1] |
|
bnagaev@116
|
78 """ |
|
bnagaev@117
|
79 nodes = self.positions |
|
|