Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/20636c0df58c/allpy/pdb.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:49:46 2014
Кодировка:
allpy: allpy/pdb.py annotate

allpy

annotate allpy/pdb.py @ 301:20636c0df58c

Removed controversial methods of allpy.base.Alignment: length, height, identity
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Thu, 16 Dec 2010 18:56:21 +0300
parents 4a2341bc90b1
children 4d610c277281
rev   line source
bnagaev@147 1 """ Functions to get pdb information from fasta id
bnagaev@138 2 and to generate fasta id from pdb information
bnagaev@138 3
bnagaev@138 4 pdb information: code, chain, model
bnagaev@138 5
bnagaev@138 6 TODO: same for local pdb files
bnagaev@138 7 """
bnagaev@138 8
bnagaev@248 9 import re
me@274 10 import base
bnagaev@248 11 from Bio.PDB import PDBParser
me@274 12 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO
bnagaev@248 13
bnagaev@248 14
bnagaev@79 15 # for pdb-codes
bnagaev@151 16 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
bnagaev@79 17
bnagaev@138 18 #~ # for files
bnagaev@138 19 #~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
bnagaev@79 20
bnagaev@138 21 def std_id(pdb_id, pdb_chain, pdb_model=None):
bnagaev@138 22 if pdb_model:
bnagaev@138 23 return "%s_%s_%s" % \
bnagaev@138 24 (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)
bnagaev@138 25 else:
bnagaev@138 26 return "%s_%s" % \
bnagaev@138 27 (pdb_id.lower().strip(), pdb_chain.upper().strip())
me@270 28
bnagaev@138 29 def pdb_id_parse(ID):
bnagaev@139 30 match = re1.search(ID)
bnagaev@138 31 if not match:
bnagaev@138 32 return None
bnagaev@139 33 d = match.groupdict()
bnagaev@139 34 if 'chain' not in d or not d['chain']:
bnagaev@139 35 d['chain'] = ' '
bnagaev@139 36 if 'model' not in d or not d['model']:
bnagaev@139 37 d['model'] = 0
bnagaev@139 38 return d
me@270 39
me@270 40
bnagaev@156 41 def get_structure(file, name):
bnagaev@157 42 return PDBParser().get_structure(name, file)
me@270 43
bnagaev@138 44 #~ def std_id_parse(ID):
bnagaev@138 45 #~ """
bnagaev@138 46 #~ Parse standart ID to pdb_code, chain and model
bnagaev@138 47 #~ """
bnagaev@138 48 #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():
bnagaev@138 49 #~ # it is file
bnagaev@138 50 #~ parseO = self.re2.search(ID) # files
bnagaev@138 51 #~ else:
bnagaev@138 52 #~ parseO = self.re1.search(ID.lower()) # pdb codes
bnagaev@138 53 #~ if not parseO:
bnagaev@138 54 #~ return None
bnagaev@138 55 #~ parse = parseO.groups()
bnagaev@138 56 #~ if len(parse) < 2:
bnagaev@138 57 #~ return None
bnagaev@138 58 #~ code = parse[1]
bnagaev@138 59 #~ chain = ''
bnagaev@138 60 #~ model = None
bnagaev@138 61 #~ if len(parse) >= 4:
bnagaev@138 62 #~ chain = parse[3]
bnagaev@138 63 #~ if chain:
bnagaev@138 64 #~ chain = chain.upper()
bnagaev@138 65 #~ if len(parse) >= 6:
bnagaev@138 66 #~ if parse[5]:
bnagaev@138 67 #~ model = parse[5]
bnagaev@138 68 #~ return code, chain, model
me@270 69
me@274 70 class Sequence(base.Sequence):
me@274 71 """Sequence with associated PDB structure.
me@274 72
me@274 73 Attributes:
me@274 74
me@274 75 * pdb_chain -- Bio.PDB.Chain
me@274 76 * pdb_file -- file object
me@274 77
me@274 78 * pdb_residues -- {Monomer: Bio.PDB.Residue}
me@274 79 * pdb_secstr -- {Monomer: 'Secondary structure'}
me@274 80 Code Secondary structure
me@274 81 H alpha-helix
me@274 82 B Isolated beta-bridge residue
me@274 83 E Strand
me@274 84 G 3-10 helix
me@274 85 I pi-helix
me@274 86 T Turn
me@274 87 S Bend
me@274 88 - Other
me@274 89
me@274 90
me@274 91 ?TODO: global pdb_structures
me@274 92 """
me@274 93
me@274 94 def __init__(self, *args, **kw):
me@274 95 self.pdb_chains = []
me@274 96 self.pdb_files = {}
me@274 97 self.pdb_residues = {}
me@274 98 self.pdb_secstr = {}
me@274 99
me@274 100 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
me@274 101 """ Reads Pdb chain from file
me@274 102
me@274 103 and align each Monomer with PDB.Residue (TODO)
me@274 104 """
me@274 105 name = std_id(pdb_id, pdb_chain, pdb_model)
me@274 106 structure = get_structure(pdb_file, name)
me@274 107 chain = structure[pdb_model][pdb_chain]
me@274 108 self.pdb_chains.append(chain)
me@274 109 self.pdb_residues[chain] = {}
me@274 110 self.pdb_secstr[chain] = {}
me@274 111 pdb_sequence = Sequence.from_pdb_chain(chain)
me@274 112 a = alignment.Alignment.from_sequences(self, pdb_sequence)
me@274 113 a.muscle_align()
me@274 114 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
me@274 115 if pdb_sequence.pdb_has(chain, pdb_monomer):
me@274 116 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
me@274 117 self.pdb_residues[chain][monomer] = residue
me@274 118 self.pdb_files[chain] = pdb_file
me@274 119
me@274 120 def pdb_unload(self):
me@274 121 """ Delete all pdb-connected links """
me@274 122 #~ gc.get_referrers(self.pdb_chains[0])
me@274 123 self.pdb_chains = []
me@274 124 self.pdb_residues = {}
me@274 125 self.pdb_secstr = {} # FIXME
me@274 126 self.pdb_files = {} # FIXME
me@274 127
me@274 128 @staticmethod
me@274 129 def from_pdb_chain(chain):
me@274 130 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
me@274 131
me@274 132 chain is Bio.PDB.Chain
me@274 133 """
me@274 134 cappbuilder = CaPPBuilder()
me@274 135 peptides = cappbuilder.build_peptides(chain)
me@274 136 sequence = Sequence()
me@274 137 sequence.pdb_chains = [chain]
me@274 138 sequence.pdb_residues[chain] = {}
me@274 139 sequence.pdb_secstr[chain] = {}
me@274 140 for peptide in peptides:
me@274 141 for ca_atom in peptide.get_ca_list():
me@274 142 residue = ca_atom.get_parent()
me@274 143 monomer = AminoAcidType.from_pdb_residue(residue).instance()
me@274 144 sequence.pdb_residues[chain][monomer] = residue
me@274 145 sequence.monomers.append(monomer)
me@274 146 return sequence
me@274 147
me@274 148 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
me@274 149 """ Adds pdb information to each monomer
me@274 150
me@274 151 Returns if information has been successfully added
me@274 152 TODO: conformity_file
me@274 153
me@274 154 id-format lava flow
me@274 155 """
me@274 156 if not conformity_info:
me@274 157 path = os.path.join(pdb_directory, self.name)
me@274 158 if os.path.exists(path) and os.path.getsize(path):
me@274 159 match = pdb_id_parse(self.name)
me@274 160 self.pdb_chain_add(open(path), match['code'],
me@274 161 match['chain'], match['model'])
me@274 162 else:
me@274 163 match = pdb_id_parse(self.name)
me@274 164 if match:
me@274 165 code = match['code']
me@274 166 pdb_filename = config.pdb_dir % code
me@274 167 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
me@274 168 url = config.pdb_url % code
me@274 169 print "Download %s" % url
me@274 170 pdb_file = open(pdb_filename, 'w')
me@274 171 data = urllib2.urlopen(url).read()
me@274 172 pdb_file.write(data)
me@274 173 pdb_file.close()
me@274 174 print "Save %s" % pdb_filename
me@274 175 pdb_file = open(pdb_filename)
me@274 176 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
me@274 177
me@274 178 def pdb_save(self, out_filename, pdb_chain):
me@274 179 """ Saves pdb_chain to out_file """
me@274 180 class GlySelect(Select):
me@274 181 def accept_chain(self, chain):
me@274 182 if chain == pdb_chain:
me@274 183 return 1
me@274 184 else:
me@274 185 return 0
me@274 186 io = PDBIO()
me@274 187 structure = chain.get_parent()
me@274 188 io.set_structure(structure)
me@274 189 io.save(out_filename, GlySelect())
me@274 190
me@274 191
me@274 192 def pdb_add_sec_str(self, pdb_chain):
me@274 193 """ Add secondary structure data """
me@274 194 tmp_file = NamedTemporaryFile(delete=False)
me@274 195 tmp_file.close()
me@274 196 pdb_file = self.pdb_files[pdb_chain].name
me@274 197 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
me@274 198 dssp, keys = make_dssp_dict(tmp_file.name)
me@274 199 for monomer in self.monomers:
me@274 200 if self.pdb_has(pdb_chain, monomer):
me@274 201 residue = self.pdb_residues[pdb_chain][monomer]
me@274 202 try:
me@274 203 d = dssp[(pdb_chain.get_id(), residue.get_id())]
me@274 204 self.pdb_secstr[pdb_chain][monomer] = d[1]
me@274 205 except:
me@274 206 print "No dssp information about %s at %s" % (monomer, pdb_chain)
me@274 207 os.unlink(tmp_file.name)
me@274 208
me@274 209 def pdb_has(self, chain, monomer):
me@274 210 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
me@274 211
me@275 212 def secstr_has(self, chain, monomer):
me@275 213 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
me@275 214
me@296 215
me@296 216 class Alignment(base.Alignment):
me@296 217
me@296 218 def secstr(self, sequence, pdb_chain, gap='-'):
me@296 219 """ Returns string representing secondary structure """
me@296 220 return ''.join([
me@296 221 (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)
me@296 222 for m in self.body[sequence]])
me@296 223
me@274 224 # vim: set ts=4 sts=4 sw=4 et: