allpy
changeset 358:1c06c34c4df2
Add generator of codes.py file (rels #16)
Generator needs template and components.cif file (currently not provied)
author | boris <bnagaev@gmail.com> |
---|---|
date | Tue, 25 Jan 2011 15:53:42 +0300 |
parents | 0bdf8e55dd86 |
children | 5af3363aaca9 2785ca30cea5 |
files | allpy/data/codes_template.txt allpy/data/mkcodes.py |
diffstat | 2 files changed, 113 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/allpy/data/codes_template.txt Tue Jan 25 15:53:42 2011 +0300 1.3 @@ -0,0 +1,17 @@ 1.4 +"""Tables of monomer codes. 1.5 + 1.6 +`dna`, `rna`, `protein` are lists of all known codes for monomers of given 1.7 +type. Each of them is a list of tuples of kind: 1.8 + 1.9 + ( 1-letter code, is-modified?, 3-letter-code, fullname ) 1.10 + 1.11 +`3-letter-code` is the code used in PDB (it may actually be one or 1.12 +two letters) 1.13 + 1.14 +""" 1.15 + 1.16 +protein = %(protein)s 1.17 +dna = %(dna)s 1.18 +rna = %(rna)s 1.19 + 1.20 +# vim: set et ts=4 sts=4 sw=4:
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/allpy/data/mkcodes.py Tue Jan 25 15:53:42 2011 +0300 2.3 @@ -0,0 +1,96 @@ 2.4 +import os 2.5 +import argparse 2.6 +from pprint import pformat 2.7 + 2.8 +def rel(*x): 2.9 + return os.path.join(os.path.abspath(os.path.dirname(__file__)), *x) 2.10 + 2.11 +p = argparse.ArgumentParser( 2.12 +description='Components.cif to codes.py converter', 2.13 +epilog='', 2.14 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 2.15 +) 2.16 + 2.17 +r = argparse.FileType('r') 2.18 +w = argparse.FileType('w') 2.19 + 2.20 +p.add_argument('-v','--version',action='version',version='%(prog)s 1.0') 2.21 +p.add_argument('-i',help='input components.cif',metavar='FILE',type=r, 2.22 + required=True) 2.23 +p.add_argument('-o',help='output codes.py',metavar='FILE',type=w, 2.24 + default=rel('codes.py')) 2.25 +p.add_argument('-t',help='Template for codes.py',metavar='FILE',type=r, 2.26 + default=rel('codes_template.txt')) 2.27 + 2.28 +try: 2.29 + args = p.parse_args() 2.30 +except Exception, t: 2.31 + print t 2.32 + exit() 2.33 + 2.34 +AAbank = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D', 'CYS':'C', 2.35 + 'GLN':'Q', 'GLU':'E', 'GLY':'G', 'HIS':'H', 'ILE':'I', 2.36 + 'LEU':'L', 'LYS':'K', 'MET':'M', 'PHE':'F', 'PRO':'P', 2.37 + 'SER':'S', 'THR':'T', 'TRP':'W', 'TYR':'Y', 'VAL':'V', 2.38 + 'DA' :'A', 'DT' :'T', 'DG' :'G', 'DC' :'C', 2.39 + 'A': 'A', 'U' :'U', 'G' :'G', 'C' :'C'} 2.40 + 2.41 +protein = [] 2.42 +dna = [] 2.43 +rna = [] 2.44 + 2.45 +cif_entry = {} 2.46 + 2.47 +for line in args.i: 2.48 + line = line.strip() 2.49 + if line == '#' and cif_entry: 2.50 + try: 2.51 + monomer_type = cif_entry['_chem_comp.type'].strip() 2.52 + if "PEPTIDE" in monomer_type: 2.53 + container = protein 2.54 + elif "DNA" in monomer_type: 2.55 + container = dna 2.56 + elif "RNA" in monomer_type: 2.57 + container = rna 2.58 + else: 2.59 + continue 2.60 + code1 = cif_entry['_chem_comp.one_letter_code'].strip().lower() 2.61 + if code1 == '?': 2.62 + parent = cif_entry['_chem_comp.mon_nstd_parent_comp_id'] 2.63 + parent = parent.strip().upper() 2.64 + if parent in AAbank: 2.65 + code1 = AAbank[parent].lower() 2.66 + else: 2.67 + if container == protein: 2.68 + code1 = 'x' 2.69 + else: 2.70 + code1 = 'n' 2.71 + code3 = cif_entry['_chem_comp.three_letter_code'].strip().upper() 2.72 + name = cif_entry['_chem_comp.name'].strip() 2.73 + if name[0] == '"' and name[-1] == '"': 2.74 + name = name[1:-1] 2.75 + modified = code3 not in AAbank 2.76 + 2.77 + container.append((code1, modified, code3, name)) 2.78 + 2.79 + cif_entry = {} # for new entry 2.80 + except: 2.81 + pass 2.82 + else: 2.83 + key_value = line.split(' ', 1) 2.84 + if len(key_value) == 2: 2.85 + key, value = key_value 2.86 + cif_entry[key] = value 2.87 + 2.88 +protein.append(('X', False, "", "Undefined Aminoacid")) 2.89 +dna.append(('N', False, "", "Undefined DNA Nucelotide")) 2.90 +rna.append(('N', False, "", "Undefined RNA Nucelotide")) 2.91 + 2.92 +protein.sort() 2.93 +dna.sort() 2.94 +rna.sort() 2.95 + 2.96 +template = args.t.read() 2.97 +args.o.write(template % {'protein': pformat(protein, width=1024), 2.98 + 'dna': pformat(dna, width=1024), 2.99 + 'rna': pformat(rna, width=1024)})