allpy

changeset 358:1c06c34c4df2
Add generator of codes.py file (rels #16) Generator needs template and components.cif file (currently not provied)
author: boris <bnagaev@gmail.com>
date: Tue, 25 Jan 2011 15:53:42 +0300
parents: 0bdf8e55dd86
children: 5af3363aaca9 2785ca30cea5
files: allpy/data/codes_template.txt allpy/data/mkcodes.py
diffstat: 2 files changed, 113 insertions(+), 0 deletions(-) [+]
[-]

allpy/data/codes_template.txt 17

allpy/data/mkcodes.py 96 allpy/data/codes_template.txt 17 allpy/data/mkcodes.py 96
allpy/data/codes_template.txt 17
allpy/data/mkcodes.py 96
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/allpy/data/codes_template.txt	Tue Jan 25 15:53:42 2011 +0300
     1.3 @@ -0,0 +1,17 @@
     1.4 +"""Tables of monomer codes.
     1.5 +
     1.6 +`dna`, `rna`, `protein` are lists of all known codes for monomers of given
     1.7 +type. Each of them is a list of tuples of kind:
     1.8 +
     1.9 +    ( 1-letter code, is-modified?, 3-letter-code, fullname )
    1.10 +
    1.11 +`3-letter-code` is the code used in PDB (it may actually be one or
    1.12 +two letters)
    1.13 +
    1.14 +"""
    1.15 +
    1.16 +protein = %(protein)s
    1.17 +dna = %(dna)s
    1.18 +rna = %(rna)s
    1.19 +
    1.20 +# vim: set et ts=4 sts=4 sw=4:

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/allpy/data/mkcodes.py	Tue Jan 25 15:53:42 2011 +0300
     2.3 @@ -0,0 +1,96 @@
     2.4 +import os
     2.5 +import argparse
     2.6 +from pprint import pformat
     2.7 +
     2.8 +def rel(*x):
     2.9 +    return os.path.join(os.path.abspath(os.path.dirname(__file__)), *x)
    2.10 +
    2.11 +p = argparse.ArgumentParser(
    2.12 +description='Components.cif to codes.py converter',
    2.13 +epilog='',
    2.14 +formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    2.15 +)
    2.16 +
    2.17 +r = argparse.FileType('r')
    2.18 +w = argparse.FileType('w')
    2.19 +
    2.20 +p.add_argument('-v','--version',action='version',version='%(prog)s 1.0')
    2.21 +p.add_argument('-i',help='input components.cif',metavar='FILE',type=r,
    2.22 +    required=True)
    2.23 +p.add_argument('-o',help='output codes.py',metavar='FILE',type=w,
    2.24 +    default=rel('codes.py'))
    2.25 +p.add_argument('-t',help='Template for codes.py',metavar='FILE',type=r,
    2.26 +    default=rel('codes_template.txt'))
    2.27 +
    2.28 +try:
    2.29 +    args = p.parse_args()
    2.30 +except Exception, t:
    2.31 +    print t
    2.32 +    exit()
    2.33 +
    2.34 +AAbank = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D', 'CYS':'C',
    2.35 +        'GLN':'Q', 'GLU':'E', 'GLY':'G', 'HIS':'H', 'ILE':'I',
    2.36 +        'LEU':'L', 'LYS':'K', 'MET':'M', 'PHE':'F', 'PRO':'P',
    2.37 +        'SER':'S', 'THR':'T', 'TRP':'W', 'TYR':'Y', 'VAL':'V',
    2.38 +        'DA' :'A', 'DT' :'T', 'DG' :'G', 'DC' :'C',
    2.39 +        'A':  'A', 'U'  :'U', 'G'  :'G', 'C'  :'C'}
    2.40 +
    2.41 +protein = []
    2.42 +dna = []
    2.43 +rna = []
    2.44 +
    2.45 +cif_entry = {}
    2.46 +
    2.47 +for line in args.i:
    2.48 +    line = line.strip()
    2.49 +    if line == '#' and cif_entry:
    2.50 +        try:
    2.51 +            monomer_type = cif_entry['_chem_comp.type'].strip()
    2.52 +            if "PEPTIDE" in monomer_type:
    2.53 +                container = protein
    2.54 +            elif "DNA" in monomer_type:
    2.55 +                container = dna
    2.56 +            elif "RNA" in monomer_type:
    2.57 +                container = rna
    2.58 +            else:
    2.59 +                continue
    2.60 +            code1 = cif_entry['_chem_comp.one_letter_code'].strip().lower()
    2.61 +            if code1 == '?':
    2.62 +                parent = cif_entry['_chem_comp.mon_nstd_parent_comp_id']
    2.63 +                parent = parent.strip().upper()
    2.64 +                if parent in AAbank:
    2.65 +                    code1 = AAbank[parent].lower()
    2.66 +                else:
    2.67 +                    if container == protein:
    2.68 +                        code1 = 'x'
    2.69 +                    else:
    2.70 +                        code1 = 'n'
    2.71 +            code3 = cif_entry['_chem_comp.three_letter_code'].strip().upper()
    2.72 +            name = cif_entry['_chem_comp.name'].strip()
    2.73 +            if name[0] == '"' and name[-1] == '"':
    2.74 +                name = name[1:-1]
    2.75 +            modified = code3 not in AAbank
    2.76 +
    2.77 +            container.append((code1, modified, code3, name))
    2.78 +
    2.79 +            cif_entry = {} # for new entry
    2.80 +        except:
    2.81 +            pass
    2.82 +    else:
    2.83 +        key_value = line.split(' ', 1)
    2.84 +        if len(key_value) == 2:
    2.85 +            key, value = key_value
    2.86 +            cif_entry[key] = value
    2.87 +
    2.88 +protein.append(('X', False, "", "Undefined Aminoacid"))
    2.89 +dna.append(('N', False, "", "Undefined DNA Nucelotide"))
    2.90 +rna.append(('N', False, "", "Undefined RNA Nucelotide"))
    2.91 +
    2.92 +protein.sort()
    2.93 +dna.sort()
    2.94 +rna.sort()
    2.95 +
    2.96 +template = args.t.read()
    2.97 +args.o.write(template % {'protein': pformat(protein, width=1024),
    2.98 +    'dna': pformat(dna, width=1024),
    2.99 +    'rna': pformat(rna, width=1024)})