Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/126f99bf234d
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:02:53 2012
Кодировка:
allpy: 126f99bf234d

allpy

changeset 490:126f99bf234d

geometrical_core refactoring * move parameter validators to argparse_validators * move export to spt to spt.py module
author boris (netbook) <bnagaev@gmail.com>
date Mon, 21 Feb 2011 14:30:22 +0300
parents 84730a6e4a15
children 27e866560087
files geometrical_core/argparse_validators.py geometrical_core/geometrical-core geometrical_core/spt.py
diffstat 3 files changed, 140 insertions(+), 122 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/geometrical_core/argparse_validators.py	Mon Feb 21 14:30:22 2011 +0300
     1.3 @@ -0,0 +1,63 @@
     1.4 +
     1.5 +import argparse
     1.6 +
     1.7 +def f_nng(string):
     1.8 +    """ Validates nonnegative (>=0) float """
     1.9 +    try:
    1.10 +        value = float(string)
    1.11 +    except:
    1.12 +        msg = "%r is wrong float" % string
    1.13 +        raise argparse.ArgumentTypeError(msg)
    1.14 +    if value < 0:
    1.15 +        msg = "%r is negative" % string
    1.16 +        raise argparse.ArgumentTypeError(msg)
    1.17 +    return value
    1.18 +
    1.19 +def part(string):
    1.20 +    """ Validates 0.0 <= float <= 1.0 """
    1.21 +    try:
    1.22 +        value = float(string)
    1.23 +    except:
    1.24 +        msg = "%r is wrong float" % string
    1.25 +        raise argparse.ArgumentTypeError(msg)
    1.26 +    if not (0.0 <= value <= 1.0):
    1.27 +        msg = "%r is not float, representing part, ie in [0, 1]" % string
    1.28 +        raise argparse.ArgumentTypeError(msg)
    1.29 +    return value
    1.30 +
    1.31 +def timeout(string):
    1.32 +    """ Validates int >= -1 """
    1.33 +    try:
    1.34 +        value = int(string)
    1.35 +    except:
    1.36 +        msg = "%r is wrong integer" % string
    1.37 +        raise argparse.ArgumentTypeError(msg)
    1.38 +    if value < -1:
    1.39 +        msg = "integer %r is less than -1" % string
    1.40 +        raise argparse.ArgumentTypeError(msg)
    1.41 +    return value
    1.42 +
    1.43 +def pos(string):
    1.44 +    """ Validates positive integer """
    1.45 +    try:
    1.46 +        value = int(string)
    1.47 +    except:
    1.48 +        msg = "%r is wrong integer" % string
    1.49 +        raise argparse.ArgumentTypeError(msg)
    1.50 +    if value < 1:
    1.51 +        msg = "%r is not positive integer" % string
    1.52 +        raise argparse.ArgumentTypeError(msg)
    1.53 +    return value
    1.54 +
    1.55 +def i_nng(string):
    1.56 +    """ Validates int >= 0 """
    1.57 +    try:
    1.58 +        value = int(string)
    1.59 +    except:
    1.60 +        msg = "%r is wrong integer" % string
    1.61 +        raise argparse.ArgumentTypeError(msg)
    1.62 +    if value < 0:
    1.63 +        msg = "integer %r is less than 0" % string
    1.64 +        raise argparse.ArgumentTypeError(msg)
    1.65 +    return value
    1.66 +
     2.1 --- a/geometrical_core/geometrical-core	Mon Feb 21 00:02:27 2011 +0300
     2.2 +++ b/geometrical_core/geometrical-core	Mon Feb 21 14:30:22 2011 +0300
     2.3 @@ -6,108 +6,54 @@
     2.4  
     2.5  import argparse
     2.6  import os
     2.7 -from tempfile import NamedTemporaryFile
     2.8  
     2.9  from allpy import config
    2.10  from protein_pdb import Alignment, Block, Monomer, Sequence
    2.11  from allpy import markup
    2.12  from allpy import fileio
    2.13 +from spt import gcs_to_spt
    2.14 +from argparse_validators import f_nng, part, timeout, pos, i_nng
    2.15  
    2.16  r = argparse.FileType('r')
    2.17  w = argparse.FileType('w')
    2.18  c = config
    2.19  
    2.20 -def f_nng(string):
    2.21 -    """ Validates nonnegative (>=0) float """
    2.22 -    try:
    2.23 -        value = float(string)
    2.24 -    except:
    2.25 -        msg = "%r is wrong float" % string
    2.26 -        raise argparse.ArgumentTypeError(msg)
    2.27 -    if value < 0:
    2.28 -        msg = "%r is negative" % string
    2.29 -        raise argparse.ArgumentTypeError(msg)
    2.30 -    return value
    2.31 -
    2.32 -def part(string):
    2.33 -    """ Validates 0.0 <= float <= 1.0 """
    2.34 -    try:
    2.35 -        value = float(string)
    2.36 -    except:
    2.37 -        msg = "%r is wrong float" % string
    2.38 -        raise argparse.ArgumentTypeError(msg)
    2.39 -    if not (0.0 <= value <= 1.0):
    2.40 -        msg = "%r is not float, representing part, ie in [0, 1]" % string
    2.41 -        raise argparse.ArgumentTypeError(msg)
    2.42 -    return value
    2.43 -
    2.44 -def timeout(string):
    2.45 -    """ Validates int >= -1 """
    2.46 -    try:
    2.47 -        value = int(string)
    2.48 -    except:
    2.49 -        msg = "%r is wrong integer" % string
    2.50 -        raise argparse.ArgumentTypeError(msg)
    2.51 -    if value < -1:
    2.52 -        msg = "integer %r is less than -1" % string
    2.53 -        raise argparse.ArgumentTypeError(msg)
    2.54 -    return value
    2.55 -
    2.56 -def pos(string):
    2.57 -    """ Validates positive integer """
    2.58 -    try:
    2.59 -        value = int(string)
    2.60 -    except:
    2.61 -        msg = "%r is wrong integer" % string
    2.62 -        raise argparse.ArgumentTypeError(msg)
    2.63 -    if value < 1:
    2.64 -        msg = "%r is not positive integer" % string
    2.65 -        raise argparse.ArgumentTypeError(msg)
    2.66 -    return value
    2.67 -
    2.68 -def i_nng(string):
    2.69 -    """ Validates int >= 0 """
    2.70 -    try:
    2.71 -        value = int(string)
    2.72 -    except:
    2.73 -        msg = "%r is wrong integer" % string
    2.74 -        raise argparse.ArgumentTypeError(msg)
    2.75 -    if value < 0:
    2.76 -        msg = "integer %r is less than 0" % string
    2.77 -        raise argparse.ArgumentTypeError(msg)
    2.78 -    return value
    2.79 -
    2.80  p = argparse.ArgumentParser(
    2.81  description='Geometrical Core building tool.',
    2.82  epilog='''1) Distance spreading [angstrom]
    2.83  2) -1 timeout means running Bron-Kerbosh algorithm without timeout
    2.84  3) Alternative core new aa part: read documentation for more information
    2.85 -4) Superposition core identifier: main core is 0, first alternative is 1 etc. ''',
    2.86 +4) Superposition core: main core is 0, first alternative is 1 etc. ''',
    2.87  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    2.88 -#~ argument_default=argparse.SUPPRESS,
    2.89  )
    2.90  
    2.91  p.add_argument('-v','--version',action='version',version='%(prog)s 2.0')
    2.92 -p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True)
    2.93 -p.add_argument('-c',help='PDB names conformity file',metavar='FILE',type=r)
    2.94 +p.add_argument('-i',help='Input alignment file',
    2.95 +    metavar='FILE',type=r,required=True)
    2.96 +p.add_argument('-c',help='PDB names conformity file',
    2.97 +    metavar='FILE',type=r)
    2.98  p.add_argument('-l',help='Output list file',metavar='FILE',type=w)
    2.99  p.add_argument('-f',help='Output fasta file',metavar='FILE',type=w)
   2.100  p.add_argument('-g',help='Output msf file',metavar='FILE',type=w)
   2.101  p.add_argument('-p',help='Output pdb file',metavar='FILE',type=w)
   2.102  p.add_argument('-s',help='Output spt file',metavar='FILE',type=w)
   2.103 -p.add_argument('-d',help='Distance spreading',metavar='float',type=f_nng,default=c.delta)
   2.104 -p.add_argument('-m',help='Min core size',metavar='int',type=pos,default=c.minsize)
   2.105 -p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',metavar='int',type=timeout,default=c.timeout)
   2.106 -p.add_argument('-n',help='Alternative core new aa part',metavar='float',type=part,default=c.ac_new_atoms)
   2.107 -p.add_argument('-a',help='Cores count',metavar='int',type=i_nng,default=c.ac_count)
   2.108 -p.add_argument('-x',help='Superposition core identifier',metavar='int',type=i_nng,default=0)
   2.109 +p.add_argument('-d',help='Distance spreading',
   2.110 +    metavar='float',type=f_nng,default=c.delta)
   2.111 +p.add_argument('-m',help='Min core size',
   2.112 +    metavar='int',type=pos,default=c.minsize)
   2.113 +p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',
   2.114 +    metavar='int',type=timeout,default=c.timeout)
   2.115 +p.add_argument('-n',help='Alternative core new aa part',
   2.116 +    metavar='float',type=part,default=c.ac_new_atoms)
   2.117 +p.add_argument('-a',help='Cores count',
   2.118 +    metavar='int',type=i_nng,default=c.ac_count)
   2.119 +p.add_argument('-x',help='Superposition core identifier',
   2.120 +    metavar='int',type=i_nng,default=0)
   2.121  
   2.122 -tmp_file = None
   2.123 -
   2.124 -#try:
   2.125  args = p.parse_args()
   2.126  
   2.127 -if not args.l and not args.f and not args.g and not args.p and not args.s:
   2.128 +if not args.l and not args.f and not args.g\
   2.129 +    and not args.p and not args.s:
   2.130      print 'Error: no output file provided'
   2.131      exit()
   2.132  if not (args.p and args.s) and not (not args.p and not args.s):
   2.133 @@ -176,51 +122,5 @@
   2.134      pdb_map = block.save_pdb(args.p)
   2.135  
   2.136      if args.s:
   2.137 -        s = args.s # spt
   2.138 -        temp_i = 0
   2.139 -        for sequence in alignment.sequences:
   2.140 -            chain, model = pdb_map[sequence]
   2.141 -            s.write("define p_%(name)s *:%(chain)s/%(model)d\n" %\
   2.142 -                {'name':sequence.name, 'chain':chain, 'model':model})
   2.143 -        cores = []
   2.144 -        for j, gc in enumerate(GCs):
   2.145 -            j = j or ''
   2.146 -            core_parts = []
   2.147 -            for sequence in alignment.sequences:
   2.148 -                core_parts.append("core%(j)s_%(name)s"%\
   2.149 -                    {'name': sequence.name, 'j': j})
   2.150 -                gc_n = []
   2.151 -                for column in gc:
   2.152 -                    monomer = column[sequence]
   2.153 -                    residue = monomer.pdb_residue
   2.154 -                    hetero_flag, n, insertion = residue.id
   2.155 -                    gc_n.append(n)
   2.156 -                gc_n.sort()
   2.157 -                batches = []
   2.158 -                while gc_n:
   2.159 -                    batch = '(%s)' % (','.join(str(i) for i in gc_n[:50]))
   2.160 -                    batches.append(batch)
   2.161 -                    gc_n = gc_n[50:]
   2.162 -                if len(batches) == 1:
   2.163 -                    s.write(
   2.164 -                    "define core%(j)s_%(name)s %(batch)s and p_%(name)s\n"%\
   2.165 -                        {'name': sequence.name, 'batch': batch, 'j': j})
   2.166 -                else:
   2.167 -                    temps = []
   2.168 -                    for batch in batches:
   2.169 -                        s.write(
   2.170 -                        "define temp_%(i)d %(batch)s and p_%(name)s\n" %\
   2.171 -                            {'name': sequence.name, 'batch': batch, 'i': i})
   2.172 -                        temps.append("temp_%(i)d" % {'i': i})
   2.173 -                    temps_str = '(%s)' % (','.join(temps))
   2.174 -                    s.write(
   2.175 -                        "define core%(j)s_%(name)s %(temps_str)s\n" % {'j':j,
   2.176 -                        'name': sequence.name, 'temps_str': temps_str})
   2.177 -            s.write("define core%(j)s %(core_parts)s\n" % {'j':j,
   2.178 -                'core_parts': ','.join(core_parts)})
   2.179 -            cores.append("core%(j)s" % {'j': j})
   2.180 -        s.write("define cores %(cores)s\n" % {'cores': ','.join(cores)})
   2.181 +        gcs_to_spt(args.s, GCs, alignment, pdb_map)
   2.182  
   2.183 -if tmp_file:
   2.184 -    os.unlink(tmp_file.name)
   2.185 -
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/geometrical_core/spt.py	Mon Feb 21 14:30:22 2011 +0300
     3.3 @@ -0,0 +1,55 @@
     3.4 +
     3.5 +def gcs_to_spt(spt, GCs, alignment, pdb_map):
     3.6 +    """ Save information about geometrical cores to spt
     3.7 +
     3.8 +     * spt - file object for write
     3.9 +     * GCs - list of geomentrical cores.
    3.10 +        Geometrical core is a collection of Column's
    3.11 +     * alignment
    3.12 +     * pdb_map - {sequence: (chain, model)}
    3.13 +        char chain
    3.14 +        int model
    3.15 +    """
    3.16 +    temp_i = 0
    3.17 +    for sequence in alignment.sequences:
    3.18 +        chain, model = pdb_map[sequence]
    3.19 +        spt.write("define p_%(name)s *:%(chain)s/%(model)d\n" %\
    3.20 +            {'name':sequence.name, 'chain':chain, 'model':model})
    3.21 +    cores = []
    3.22 +    for j, gc in enumerate(GCs):
    3.23 +        j = j or ''
    3.24 +        core_parts = []
    3.25 +        for sequence in alignment.sequences:
    3.26 +            core_parts.append("core%(j)s_%(name)s"%\
    3.27 +                {'name': sequence.name, 'j': j})
    3.28 +            gc_n = []
    3.29 +            for column in gc:
    3.30 +                monomer = column[sequence]
    3.31 +                residue = monomer.pdb_residue
    3.32 +                hetero_flag, n, insertion = residue.id
    3.33 +                gc_n.append(n)
    3.34 +            gc_n.sort()
    3.35 +            batches = []
    3.36 +            while gc_n:
    3.37 +                batch = '(%s)'%(','.join(str(i) for i in gc_n[:50]))
    3.38 +                batches.append(batch)
    3.39 +                gc_n = gc_n[50:]
    3.40 +            if len(batches) == 1:
    3.41 +                spt.write(
    3.42 +                "define core%(j)s_%(name)s %(batch)s and p_%(name)s\n"%\
    3.43 +                    {'name': sequence.name, 'batch': batch, 'j': j})
    3.44 +            else:
    3.45 +                temps = []
    3.46 +                for batch in batches:
    3.47 +                    spt.write(
    3.48 +                    "define temp_%(i)d %(batch)s and p_%(name)s\n"\
    3.49 +                    %{'name': sequence.name, 'batch': batch, 'i': i})
    3.50 +                    temps.append("temp_%(i)d" % {'i': i})
    3.51 +                temps_str = '(%s)' % (','.join(temps))
    3.52 +                spt.write(
    3.53 +                "define core%(j)s_%(name)s %(temps_str)s\n" % {'j':j,
    3.54 +                'name': sequence.name, 'temps_str': temps_str})
    3.55 +        spt.write("define core%(j)s %(core_parts)s\n" % {'j':j,
    3.56 +            'core_parts': ','.join(core_parts)})
    3.57 +        cores.append("core%(j)s" % {'j': j})
    3.58 +    spt.write("define cores %(cores)s\n" % {'cores': ','.join(cores)})