rev |
line source |
bnagaev@147
|
1 """ Functions to get pdb information from fasta id |
bnagaev@138
|
2 and to generate fasta id from pdb information |
bnagaev@138
|
3 |
bnagaev@138
|
4 pdb information: code, chain, model |
bnagaev@138
|
5 |
bnagaev@138
|
6 TODO: same for local pdb files |
bnagaev@138
|
7 """ |
bnagaev@138
|
8 |
bnagaev@248
|
9 import re |
me@274
|
10 import base |
bnagaev@248
|
11 from Bio.PDB import PDBParser |
me@274
|
12 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO |
bnagaev@248
|
13 |
bnagaev@248
|
14 |
bnagaev@79
|
15 # for pdb-codes |
bnagaev@151
|
16 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I) |
bnagaev@79
|
17 |
bnagaev@138
|
18 #~ # for files |
bnagaev@138
|
19 #~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$") |
bnagaev@79
|
20 |
bnagaev@138
|
21 def std_id(pdb_id, pdb_chain, pdb_model=None): |
bnagaev@138
|
22 if pdb_model: |
bnagaev@138
|
23 return "%s_%s_%s" % \ |
bnagaev@138
|
24 (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model) |
bnagaev@138
|
25 else: |
bnagaev@138
|
26 return "%s_%s" % \ |
bnagaev@138
|
27 (pdb_id.lower().strip(), pdb_chain.upper().strip()) |
me@270
|
28 |
bnagaev@138
|
29 def pdb_id_parse(ID): |
bnagaev@139
|
30 match = re1.search(ID) |
bnagaev@138
|
31 if not match: |
bnagaev@138
|
32 return None |
bnagaev@139
|
33 d = match.groupdict() |
bnagaev@139
|
34 if 'chain' not in d or not d['chain']: |
bnagaev@139
|
35 d['chain'] = ' ' |
bnagaev@139
|
36 if 'model' not in d or not d['model']: |
bnagaev@139
|
37 d['model'] = 0 |
bnagaev@139
|
38 return d |
me@270
|
39 |
me@270
|
40 |
bnagaev@156
|
41 def get_structure(file, name): |
bnagaev@157
|
42 return PDBParser().get_structure(name, file) |
me@270
|
43 |
bnagaev@138
|
44 #~ def std_id_parse(ID): |
bnagaev@138
|
45 #~ """ |
bnagaev@138
|
46 #~ Parse standart ID to pdb_code, chain and model |
bnagaev@138
|
47 #~ """ |
bnagaev@138
|
48 #~ if '.ent' in ID.lower() or '.pdb' in ID.lower(): |
bnagaev@138
|
49 #~ # it is file |
bnagaev@138
|
50 #~ parseO = self.re2.search(ID) # files |
bnagaev@138
|
51 #~ else: |
bnagaev@138
|
52 #~ parseO = self.re1.search(ID.lower()) # pdb codes |
bnagaev@138
|
53 #~ if not parseO: |
bnagaev@138
|
54 #~ return None |
bnagaev@138
|
55 #~ parse = parseO.groups() |
bnagaev@138
|
56 #~ if len(parse) < 2: |
bnagaev@138
|
57 #~ return None |
bnagaev@138
|
58 #~ code = parse[1] |
bnagaev@138
|
59 #~ chain = '' |
bnagaev@138
|
60 #~ model = None |
bnagaev@138
|
61 #~ if len(parse) >= 4: |
bnagaev@138
|
62 #~ chain = parse[3] |
bnagaev@138
|
63 #~ if chain: |
bnagaev@138
|
64 #~ chain = chain.upper() |
bnagaev@138
|
65 #~ if len(parse) >= 6: |
bnagaev@138
|
66 #~ if parse[5]: |
bnagaev@138
|
67 #~ model = parse[5] |
bnagaev@138
|
68 #~ return code, chain, model |
me@270
|
69 |
me@274
|
70 class Sequence(base.Sequence): |
me@274
|
71 """Sequence with associated PDB structure. |
me@274
|
72 |
me@274
|
73 Attributes: |
me@274
|
74 |
me@274
|
75 * pdb_chain -- Bio.PDB.Chain |
me@274
|
76 * pdb_file -- file object |
me@274
|
77 |
me@274
|
78 * pdb_residues -- {Monomer: Bio.PDB.Residue} |
me@274
|
79 * pdb_secstr -- {Monomer: 'Secondary structure'} |
me@274
|
80 Code Secondary structure |
me@274
|
81 H alpha-helix |
me@274
|
82 B Isolated beta-bridge residue |
me@274
|
83 E Strand |
me@274
|
84 G 3-10 helix |
me@274
|
85 I pi-helix |
me@274
|
86 T Turn |
me@274
|
87 S Bend |
me@274
|
88 - Other |
me@274
|
89 |
me@274
|
90 |
me@274
|
91 ?TODO: global pdb_structures |
me@274
|
92 """ |
me@274
|
93 |
me@274
|
94 def __init__(self, *args, **kw): |
me@274
|
95 self.pdb_chains = [] |
me@274
|
96 self.pdb_files = {} |
me@274
|
97 self.pdb_residues = {} |
me@274
|
98 self.pdb_secstr = {} |
me@274
|
99 |
me@274
|
100 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0): |
me@274
|
101 """ Reads Pdb chain from file |
me@274
|
102 |
me@274
|
103 and align each Monomer with PDB.Residue (TODO) |
me@274
|
104 """ |
me@274
|
105 name = std_id(pdb_id, pdb_chain, pdb_model) |
me@274
|
106 structure = get_structure(pdb_file, name) |
me@274
|
107 chain = structure[pdb_model][pdb_chain] |
me@274
|
108 self.pdb_chains.append(chain) |
me@274
|
109 self.pdb_residues[chain] = {} |
me@274
|
110 self.pdb_secstr[chain] = {} |
me@274
|
111 pdb_sequence = Sequence.from_pdb_chain(chain) |
me@274
|
112 a = alignment.Alignment.from_sequences(self, pdb_sequence) |
me@274
|
113 a.muscle_align() |
me@274
|
114 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self): |
me@274
|
115 if pdb_sequence.pdb_has(chain, pdb_monomer): |
me@274
|
116 residue = pdb_sequence.pdb_residues[chain][pdb_monomer] |
me@274
|
117 self.pdb_residues[chain][monomer] = residue |
me@274
|
118 self.pdb_files[chain] = pdb_file |
me@274
|
119 |
me@274
|
120 def pdb_unload(self): |
me@274
|
121 """ Delete all pdb-connected links """ |
me@274
|
122 #~ gc.get_referrers(self.pdb_chains[0]) |
me@274
|
123 self.pdb_chains = [] |
me@274
|
124 self.pdb_residues = {} |
me@274
|
125 self.pdb_secstr = {} # FIXME |
me@274
|
126 self.pdb_files = {} # FIXME |
me@274
|
127 |
me@274
|
128 @staticmethod |
me@274
|
129 def from_pdb_chain(chain): |
me@274
|
130 """ Returns Sequence with Monomers with link to Bio.PDB.Residue |
me@274
|
131 |
me@274
|
132 chain is Bio.PDB.Chain |
me@274
|
133 """ |
me@274
|
134 cappbuilder = CaPPBuilder() |
me@274
|
135 peptides = cappbuilder.build_peptides(chain) |
me@274
|
136 sequence = Sequence() |
me@274
|
137 sequence.pdb_chains = [chain] |
me@274
|
138 sequence.pdb_residues[chain] = {} |
me@274
|
139 sequence.pdb_secstr[chain] = {} |
me@274
|
140 for peptide in peptides: |
me@274
|
141 for ca_atom in peptide.get_ca_list(): |
me@274
|
142 residue = ca_atom.get_parent() |
me@274
|
143 monomer = AminoAcidType.from_pdb_residue(residue).instance() |
me@274
|
144 sequence.pdb_residues[chain][monomer] = residue |
me@274
|
145 sequence.monomers.append(monomer) |
me@274
|
146 return sequence |
me@274
|
147 |
me@274
|
148 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'): |
me@274
|
149 """ Adds pdb information to each monomer |
me@274
|
150 |
me@274
|
151 Returns if information has been successfully added |
me@274
|
152 TODO: conformity_file |
me@274
|
153 |
me@274
|
154 id-format lava flow |
me@274
|
155 """ |
me@274
|
156 if not conformity_info: |
me@274
|
157 path = os.path.join(pdb_directory, self.name) |
me@274
|
158 if os.path.exists(path) and os.path.getsize(path): |
me@274
|
159 match = pdb_id_parse(self.name) |
me@274
|
160 self.pdb_chain_add(open(path), match['code'], |
me@274
|
161 match['chain'], match['model']) |
me@274
|
162 else: |
me@274
|
163 match = pdb_id_parse(self.name) |
me@274
|
164 if match: |
me@274
|
165 code = match['code'] |
me@274
|
166 pdb_filename = config.pdb_dir % code |
me@274
|
167 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename): |
me@274
|
168 url = config.pdb_url % code |
me@274
|
169 print "Download %s" % url |
me@274
|
170 pdb_file = open(pdb_filename, 'w') |
me@274
|
171 data = urllib2.urlopen(url).read() |
me@274
|
172 pdb_file.write(data) |
me@274
|
173 pdb_file.close() |
me@274
|
174 print "Save %s" % pdb_filename |
me@274
|
175 pdb_file = open(pdb_filename) |
me@274
|
176 self.pdb_chain_add(pdb_file, code, match['chain'], match['model']) |
me@274
|
177 |
me@274
|
178 def pdb_save(self, out_filename, pdb_chain): |
me@274
|
179 """ Saves pdb_chain to out_file """ |
me@274
|
180 class GlySelect(Select): |
me@274
|
181 def accept_chain(self, chain): |
me@274
|
182 if chain == pdb_chain: |
me@274
|
183 return 1 |
me@274
|
184 else: |
me@274
|
185 return 0 |
me@274
|
186 io = PDBIO() |
me@274
|
187 structure = chain.get_parent() |
me@274
|
188 io.set_structure(structure) |
me@274
|
189 io.save(out_filename, GlySelect()) |
me@274
|
190 |
me@274
|
191 |
me@274
|
192 def pdb_add_sec_str(self, pdb_chain): |
me@274
|
193 """ Add secondary structure data """ |
me@274
|
194 tmp_file = NamedTemporaryFile(delete=False) |
me@274
|
195 tmp_file.close() |
me@274
|
196 pdb_file = self.pdb_files[pdb_chain].name |
me@274
|
197 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name}) |
me@274
|
198 dssp, keys = make_dssp_dict(tmp_file.name) |
me@274
|
199 for monomer in self.monomers: |
me@274
|
200 if self.pdb_has(pdb_chain, monomer): |
me@274
|
201 residue = self.pdb_residues[pdb_chain][monomer] |
me@274
|
202 try: |
me@274
|
203 d = dssp[(pdb_chain.get_id(), residue.get_id())] |
me@274
|
204 self.pdb_secstr[pdb_chain][monomer] = d[1] |
me@274
|
205 except: |
me@274
|
206 print "No dssp information about %s at %s" % (monomer, pdb_chain) |
me@274
|
207 os.unlink(tmp_file.name) |
me@274
|
208 |
me@274
|
209 def pdb_has(self, chain, monomer): |
me@274
|
210 return chain in self.pdb_residues and monomer in self.pdb_residues[chain] |
me@274
|
211 |
me@275
|
212 def secstr_has(self, chain, monomer): |
me@275
|
213 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain] |
me@275
|
214 |
me@296
|
215 |
me@296
|
216 class Alignment(base.Alignment): |
me@296
|
217 |
me@296
|
218 def secstr(self, sequence, pdb_chain, gap='-'): |
me@296
|
219 """ Returns string representing secondary structure """ |
me@296
|
220 return ''.join([ |
me@296
|
221 (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap) |
me@296
|
222 for m in self.body[sequence]]) |
me@296
|
223 |
me@274
|
224 # vim: set ts=4 sts=4 sw=4 et: |