Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/e5c8deea6f83/allpy/pdb.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 18:09:25 2013
Кодировка:
allpy: e5c8deea6f83 allpy/pdb.py

allpy

view allpy/pdb.py @ 304:e5c8deea6f83

Added docstrings to allpy.util
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Thu, 16 Dec 2010 19:15:34 +0300
parents 4a2341bc90b1
children 4d610c277281
line source
1 """ Functions to get pdb information from fasta id
2 and to generate fasta id from pdb information
4 pdb information: code, chain, model
6 TODO: same for local pdb files
7 """
9 import re
10 import base
11 from Bio.PDB import PDBParser
12 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO
15 # for pdb-codes
16 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
18 #~ # for files
19 #~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
21 def std_id(pdb_id, pdb_chain, pdb_model=None):
22 if pdb_model:
23 return "%s_%s_%s" % \
24 (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)
25 else:
26 return "%s_%s" % \
27 (pdb_id.lower().strip(), pdb_chain.upper().strip())
29 def pdb_id_parse(ID):
30 match = re1.search(ID)
31 if not match:
32 return None
33 d = match.groupdict()
34 if 'chain' not in d or not d['chain']:
35 d['chain'] = ' '
36 if 'model' not in d or not d['model']:
37 d['model'] = 0
38 return d
41 def get_structure(file, name):
42 return PDBParser().get_structure(name, file)
44 #~ def std_id_parse(ID):
45 #~ """
46 #~ Parse standart ID to pdb_code, chain and model
47 #~ """
48 #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():
49 #~ # it is file
50 #~ parseO = self.re2.search(ID) # files
51 #~ else:
52 #~ parseO = self.re1.search(ID.lower()) # pdb codes
53 #~ if not parseO:
54 #~ return None
55 #~ parse = parseO.groups()
56 #~ if len(parse) < 2:
57 #~ return None
58 #~ code = parse[1]
59 #~ chain = ''
60 #~ model = None
61 #~ if len(parse) >= 4:
62 #~ chain = parse[3]
63 #~ if chain:
64 #~ chain = chain.upper()
65 #~ if len(parse) >= 6:
66 #~ if parse[5]:
67 #~ model = parse[5]
68 #~ return code, chain, model
70 class Sequence(base.Sequence):
71 """Sequence with associated PDB structure.
73 Attributes:
75 * pdb_chain -- Bio.PDB.Chain
76 * pdb_file -- file object
78 * pdb_residues -- {Monomer: Bio.PDB.Residue}
79 * pdb_secstr -- {Monomer: 'Secondary structure'}
80 Code Secondary structure
81 H alpha-helix
82 B Isolated beta-bridge residue
83 E Strand
84 G 3-10 helix
85 I pi-helix
86 T Turn
87 S Bend
88 - Other
91 ?TODO: global pdb_structures
92 """
94 def __init__(self, *args, **kw):
95 self.pdb_chains = []
96 self.pdb_files = {}
97 self.pdb_residues = {}
98 self.pdb_secstr = {}
100 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
101 """ Reads Pdb chain from file
103 and align each Monomer with PDB.Residue (TODO)
104 """
105 name = std_id(pdb_id, pdb_chain, pdb_model)
106 structure = get_structure(pdb_file, name)
107 chain = structure[pdb_model][pdb_chain]
108 self.pdb_chains.append(chain)
109 self.pdb_residues[chain] = {}
110 self.pdb_secstr[chain] = {}
111 pdb_sequence = Sequence.from_pdb_chain(chain)
112 a = alignment.Alignment.from_sequences(self, pdb_sequence)
113 a.muscle_align()
114 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
115 if pdb_sequence.pdb_has(chain, pdb_monomer):
116 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
117 self.pdb_residues[chain][monomer] = residue
118 self.pdb_files[chain] = pdb_file
120 def pdb_unload(self):
121 """ Delete all pdb-connected links """
122 #~ gc.get_referrers(self.pdb_chains[0])
123 self.pdb_chains = []
124 self.pdb_residues = {}
125 self.pdb_secstr = {} # FIXME
126 self.pdb_files = {} # FIXME
128 @staticmethod
129 def from_pdb_chain(chain):
130 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
132 chain is Bio.PDB.Chain
133 """
134 cappbuilder = CaPPBuilder()
135 peptides = cappbuilder.build_peptides(chain)
136 sequence = Sequence()
137 sequence.pdb_chains = [chain]
138 sequence.pdb_residues[chain] = {}
139 sequence.pdb_secstr[chain] = {}
140 for peptide in peptides:
141 for ca_atom in peptide.get_ca_list():
142 residue = ca_atom.get_parent()
143 monomer = AminoAcidType.from_pdb_residue(residue).instance()
144 sequence.pdb_residues[chain][monomer] = residue
145 sequence.monomers.append(monomer)
146 return sequence
148 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
149 """ Adds pdb information to each monomer
151 Returns if information has been successfully added
152 TODO: conformity_file
154 id-format lava flow
155 """
156 if not conformity_info:
157 path = os.path.join(pdb_directory, self.name)
158 if os.path.exists(path) and os.path.getsize(path):
159 match = pdb_id_parse(self.name)
160 self.pdb_chain_add(open(path), match['code'],
161 match['chain'], match['model'])
162 else:
163 match = pdb_id_parse(self.name)
164 if match:
165 code = match['code']
166 pdb_filename = config.pdb_dir % code
167 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
168 url = config.pdb_url % code
169 print "Download %s" % url
170 pdb_file = open(pdb_filename, 'w')
171 data = urllib2.urlopen(url).read()
172 pdb_file.write(data)
173 pdb_file.close()
174 print "Save %s" % pdb_filename
175 pdb_file = open(pdb_filename)
176 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
178 def pdb_save(self, out_filename, pdb_chain):
179 """ Saves pdb_chain to out_file """
180 class GlySelect(Select):
181 def accept_chain(self, chain):
182 if chain == pdb_chain:
183 return 1
184 else:
185 return 0
186 io = PDBIO()
187 structure = chain.get_parent()
188 io.set_structure(structure)
189 io.save(out_filename, GlySelect())
192 def pdb_add_sec_str(self, pdb_chain):
193 """ Add secondary structure data """
194 tmp_file = NamedTemporaryFile(delete=False)
195 tmp_file.close()
196 pdb_file = self.pdb_files[pdb_chain].name
197 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
198 dssp, keys = make_dssp_dict(tmp_file.name)
199 for monomer in self.monomers:
200 if self.pdb_has(pdb_chain, monomer):
201 residue = self.pdb_residues[pdb_chain][monomer]
202 try:
203 d = dssp[(pdb_chain.get_id(), residue.get_id())]
204 self.pdb_secstr[pdb_chain][monomer] = d[1]
205 except:
206 print "No dssp information about %s at %s" % (monomer, pdb_chain)
207 os.unlink(tmp_file.name)
209 def pdb_has(self, chain, monomer):
210 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
212 def secstr_has(self, chain, monomer):
213 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
216 class Alignment(base.Alignment):
218 def secstr(self, sequence, pdb_chain, gap='-'):
219 """ Returns string representing secondary structure """
220 return ''.join([
221 (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)
222 for m in self.body[sequence]])
224 # vim: set ts=4 sts=4 sw=4 et: