rev |
line source |
BurkovBA@0
|
1 #!usr/bin/python |
BurkovBA@0
|
2 |
BurkovBA@0
|
3 import sys |
BurkovBA@0
|
4 |
BurkovBA@0
|
5 import project |
BurkovBA@0
|
6 import sequence |
BurkovBA@0
|
7 import monomer |
bnagaev@116
|
8 import config |
bnagaev@121
|
9 from graph import Graph |
bnagaev@124
|
10 from copy import copy |
BurkovBA@0
|
11 |
BurkovBA@0
|
12 class Block(object): |
BurkovBA@0
|
13 """ |
BurkovBA@0
|
14 Mandatory data: |
BurkovBA@0
|
15 * self.project -- project object, which the block belongs to |
BurkovBA@1
|
16 * self.sequences - set of sequence objects that contain monomers |
BurkovBA@0
|
17 and/or gaps, that constitute the block |
bnagaev@115
|
18 * self.positions -- sorted list of positions of the project.alignment that |
BurkovBA@1
|
19 are included in the block |
bnagaev@116
|
20 |
BurkovBA@0
|
21 How to create a new block: |
BurkovBA@0
|
22 >>> import project |
BurkovBA@0
|
23 >>> import block |
BurkovBA@0
|
24 >>> proj = project.Project(open("test.fasta")) |
bnagaev@114
|
25 >>> block1 = block.Block(proj) |
BurkovBA@0
|
26 """ |
BurkovBA@0
|
27 |
bnagaev@112
|
28 def __init__(self, project, sequences=None, positions=None): |
bnagaev@112
|
29 """ |
bnagaev@112
|
30 Builds new block from project |
bnagaev@112
|
31 if sequences==None, all sequences are used |
bnagaev@112
|
32 if positions==None, all positions are used |
bnagaev@112
|
33 """ |
bnagaev@112
|
34 if sequences == None: |
bnagaev@112
|
35 sequences = project.sequences |
bnagaev@112
|
36 if positions == None: |
bnagaev@112
|
37 positions = range(len(project)) |
BurkovBA@73
|
38 self.project = project |
BurkovBA@73
|
39 self.sequences = sequences |
BurkovBA@73
|
40 self.positions = positions |
BurkovBA@0
|
41 |
bnagaev@112
|
42 def save_fasta(self, out_file, long_line=60): |
bnagaev@112
|
43 """ |
bnagaev@112
|
44 Saves alignment to given file in fasta-format |
bnagaev@112
|
45 Splits long lines to substrings of length=long_line |
bnagaev@112
|
46 To prevent this, set long_line=None |
bnagaev@112
|
47 |
BurkovBA@0
|
48 No changes in the names, descriptions or order of the sequences |
BurkovBA@0
|
49 are made. |
BurkovBA@0
|
50 """ |
BurkovBA@0
|
51 for sequence in self.sequences: |
bnagaev@112
|
52 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__) |
bnagaev@113
|
53 alignment_monomers = self.project.alignment[sequence] |
bnagaev@115
|
54 block_monomers = [alignment_monomers[i] for i in self.positions] |
bnagaev@113
|
55 string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) |
bnagaev@112
|
56 if long_line: |
bnagaev@112
|
57 for i in range(0, len(string) // long_line + 1): |
bnagaev@112
|
58 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line]) |
bnagaev@112
|
59 else: |
bnagaev@112
|
60 out_file.write("%s \n" % string) |
bnagaev@116
|
61 |
bnagaev@120
|
62 def geometrical_core(self, max_delta=config.delta, |
bnagaev@120
|
63 timeout=config.timeout, minsize=config.minsize): |
bnagaev@116
|
64 """ |
bnagaev@124
|
65 returns new block, representing geometrical core |
bnagaev@124
|
66 |
bnagaev@116
|
67 delta -- threshold of distance spreading |
bnagaev@120
|
68 |
bnagaev@120
|
69 If more than one pdb chain for some sequence provided, consider all of them |
bnagaev@120
|
70 cost is calculated as 1 / (delta + 1) |
bnagaev@120
|
71 delta in [0, +inf) => cost in (0, 1] |
bnagaev@116
|
72 """ |
bnagaev@117
|
73 nodes = self.positions |
bnagaev@117
|
74 lines = {} |
bnagaev@116
|
75 for i in self.positions: |
bnagaev@116
|
76 for j in self.positions: |
bnagaev@117
|
77 if i < j: |
bnagaev@120
|
78 distances = [] |
bnagaev@120
|
79 for sequence in self.sequences: |
bnagaev@120
|
80 for chain in sequence.pdb_chains: |
bnagaev@120
|
81 m1 = self.project.alignment[sequence][i] |
bnagaev@120
|
82 m2 = self.project.alignment[sequence][j] |
bnagaev@122
|
83 if m1 and m2: |
bnagaev@122
|
84 ca1 = m1.pdb_residues[chain]['CA'] |
bnagaev@122
|
85 ca2 = m2.pdb_residues[chain]['CA'] |
bnagaev@122
|
86 d = ca1 - ca2 # Bio.PDB feature |
bnagaev@122
|
87 distances.append(d) |
bnagaev@122
|
88 if len(distances) >= 2: |
bnagaev@122
|
89 delta = max(distances) - min(distances) |
bnagaev@122
|
90 if delta <= max_delta: |
bnagaev@122
|
91 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta) |
bnagaev@120
|
92 graph = Graph(nodes, lines) |
bnagaev@124
|
93 positions = graph.cliques(timeout=timeout, minsize=minsize) |
bnagaev@124
|
94 return Block(self.project, copy(self.sequences), positions) |
bnagaev@123
|
95 |
bnagaev@123
|
96 def xstring(self, x): |
bnagaev@123
|
97 """ |
bnagaev@123
|
98 Returns string consisting of '-' and chars x at self.positions |
bnagaev@123
|
99 Length of returning string = length of project |
bnagaev@123
|
100 """ |
bnagaev@123
|
101 monomers = [False] * len(self.project) |
bnagaev@123
|
102 for i in self.positions: |
bnagaev@123
|
103 monomers[i] = True |
bnagaev@123
|
104 return ''.join([x if m else '-' for m in monomers]) |