allpy
changeset 139:57c923c2e333
sequence.pdb_auto_add
fix allpy_pdb.pdb_id_parse and re1
author | boris <bnagaev@gmail.com> |
---|---|
date | Sun, 24 Oct 2010 18:12:21 +0400 |
parents | 32b9f4fadd35 |
children | e10310ed076c |
files | geometrical_core/geometrical_core.py lib/allpy_pdb.py lib/config.py lib/project.py lib/sequence.py |
diffstat | 5 files changed, 56 insertions(+), 17 deletions(-) [+] |
line diff
1.1 --- a/geometrical_core/geometrical_core.py Sun Oct 24 17:29:59 2010 +0400 1.2 +++ b/geometrical_core/geometrical_core.py Sun Oct 24 18:12:21 2010 +0400 1.3 @@ -118,7 +118,7 @@ 1.4 exit() 1.5 1.6 project = Project(args.i) 1.7 - project 1.8 + project.pdb_auto_add(args.c) 1.9 1.10 except Exception, t: 1.11 print t
2.1 --- a/lib/allpy_pdb.py Sun Oct 24 17:29:59 2010 +0400 2.2 +++ b/lib/allpy_pdb.py Sun Oct 24 18:12:21 2010 +0400 2.3 @@ -11,7 +11,7 @@ 2.4 """ 2.5 2.6 # for pdb-codes 2.7 -re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?($|[^a-z0-9])") 2.8 +re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?($|[^a-z0-9])", re.I) 2.9 2.10 #~ # for files 2.11 #~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$") 2.12 @@ -25,11 +25,15 @@ 2.13 (pdb_id.lower().strip(), pdb_chain.upper().strip()) 2.14 2.15 def pdb_id_parse(ID): 2.16 - match = re1.search(ID, re.I) 2.17 + match = re1.search(ID) 2.18 if not match: 2.19 return None 2.20 - return match.groupdict() 2.21 - 2.22 + d = match.groupdict() 2.23 + if 'chain' not in d or not d['chain']: 2.24 + d['chain'] = ' ' 2.25 + if 'model' not in d or not d['model']: 2.26 + d['model'] = 0 2.27 + return d 2.28 2.29 #~ def std_id_parse(ID): 2.30 #~ """
3.1 --- a/lib/config.py Sun Oct 24 17:29:59 2010 +0400 3.2 +++ b/lib/config.py Sun Oct 24 18:12:21 2010 +0400 3.3 @@ -4,7 +4,8 @@ 3.4 maxabsent = 0.15 # deprecated? 3.5 3.6 # pdb download url (XXXX is pdb code place) 3.7 -pdb_url = 'http://www.pdb.org/pdb/files/XXXX.pdb' 3.8 +pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb' 3.9 +pdb_dir = '/tmp/%s.pdb' 3.10 timeout = 10 # time in sec. for BRON-KERBOSH algorithm 3.11 3.12
4.1 --- a/lib/project.py Sun Oct 24 17:29:59 2010 +0400 4.2 +++ b/lib/project.py Sun Oct 24 18:12:21 2010 +0400 4.3 @@ -167,7 +167,7 @@ 4.4 uses old Monomers and Sequences objects 4.5 """ 4.6 tmp_file, tmp_filename = mkstemp() 4.7 - os.close(tmp_file) # this is file descriptor, not normal file object. security issue ) 4.8 + os.close(tmp_file) # this is file descriptor, not normal file object. 4.9 tmp_file = open(tmp_filename, 'w') 4.10 self.save_fasta(tmp_file) 4.11 tmp_file.close() 4.12 @@ -177,7 +177,8 @@ 4.13 try: 4.14 new_sequence = [i for i in sequences if sequence==i][0] 4.15 except: 4.16 - raise Exception("Align: Cann't find sequence %s in muscle output" % sequence.name) 4.17 + raise Exception("Align: Cann't find sequence %s in muscle output" % \ 4.18 + sequence.name) 4.19 old_monomers = iter(sequence.monomers) 4.20 self.alignment[sequence] = [] 4.21 for monomer in alignment[new_sequence]: 4.22 @@ -194,7 +195,7 @@ 4.23 returns list of columns of alignment 4.24 sequence or sequences: 4.25 if sequence is given, then column is (original_monomer, monomer) 4.26 - if sequences is given, then column is dict (original_monomer, {sequence: monomer}) 4.27 + if sequences is given, then column is (original_monomer, {sequence: monomer}) 4.28 if both of them are given, it is an error 4.29 original (Sequence type): 4.30 if given, this filters only columns represented by original sequence 4.31 @@ -210,11 +211,17 @@ 4.32 if sequence: 4.33 yield (column[indexes[original]], column[indexes[sequence]]) 4.34 else: 4.35 - yield (column[indexes[original]], dict([(s, column[indexes[s]]) for s in sequences])) 4.36 + yield (column[indexes[original]], 4.37 + dict([(s, column[indexes[s]]) for s in sequences])) 4.38 4.39 - def add_pdb(self, conformity_file=None): 4.40 - pass 4.41 + def pdb_auto_add(self, conformity_file=None): 4.42 + """ 4.43 + Adds pdb information to each sequence 4.44 4.45 + TODO: conformity_file 4.46 + """ 4.47 + conformity = {} 4.48 4.49 + for sequence in self.sequences: 4.50 + sequence.pdb_auto_add(conformity.get(sequence.name, None)) 4.51 4.52 -
5.1 --- a/lib/sequence.py Sun Oct 24 17:29:59 2010 +0400 5.2 +++ b/lib/sequence.py Sun Oct 24 18:12:21 2010 +0400 5.3 @@ -1,10 +1,12 @@ 5.4 #!/usr/bin/python 5.5 from monomer import AminoAcidType 5.6 from Bio.PDB import PDBParser, CaPPBuilder 5.7 -from allpy_pdb import std_id 5.8 +from allpy_pdb import std_id, pdb_id_parse 5.9 import project 5.10 import sys 5.11 - 5.12 +import config 5.13 +import os.path 5.14 +import urllib2 5.15 5.16 cappbuilder = CaPPBuilder() 5.17 5.18 @@ -76,11 +78,36 @@ 5.19 monomer.pdb_residue_add(chain, residue) 5.20 sequence.monomers.append(monomer) 5.21 return sequence 5.22 + 5.23 + def pdb_auto_add(self, conformity_info=None): 5.24 + """ 5.25 + Adds pdb information to each monomer 5.26 + Returns if information has been successfully added 5.27 5.28 + TODO: conformity_file 5.29 + """ 5.30 + if not conformity_info: 5.31 + match = pdb_id_parse(self.name) 5.32 + if match: 5.33 + code = match['code'] 5.34 + pdb_filename = config.pdb_dir % code 5.35 + if not os.path.exists(pdb_filename): 5.36 + url = config.pdb_url % code 5.37 + print "Download %s" % url 5.38 + pdb_file = open(pdb_filename, 'w') 5.39 + pdb_file.write(urllib2.urlopen(url).read()) 5.40 + pdb_file.close() 5.41 + print "Save %s" % pdb_filename 5.42 + pdb_file = open(pdb_filename) 5.43 + try: 5.44 + self.pdb_chain_add(pdb_file, code, match['chain'], match['model']) 5.45 + return True 5.46 + except: 5.47 + return False 5.48 + else: 5.49 + return False 5.50 5.51 5.52 5.53 5.54 5.55 - 5.56 -