allpy
changeset 490:126f99bf234d
geometrical_core refactoring
* move parameter validators to argparse_validators
* move export to spt to spt.py module
author | boris (netbook) <bnagaev@gmail.com> |
---|---|
date | Mon, 21 Feb 2011 14:30:22 +0300 |
parents | 84730a6e4a15 |
children | 27e866560087 |
files | geometrical_core/argparse_validators.py geometrical_core/geometrical-core geometrical_core/spt.py |
diffstat | 3 files changed, 140 insertions(+), 122 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/geometrical_core/argparse_validators.py Mon Feb 21 14:30:22 2011 +0300 1.3 @@ -0,0 +1,63 @@ 1.4 + 1.5 +import argparse 1.6 + 1.7 +def f_nng(string): 1.8 + """ Validates nonnegative (>=0) float """ 1.9 + try: 1.10 + value = float(string) 1.11 + except: 1.12 + msg = "%r is wrong float" % string 1.13 + raise argparse.ArgumentTypeError(msg) 1.14 + if value < 0: 1.15 + msg = "%r is negative" % string 1.16 + raise argparse.ArgumentTypeError(msg) 1.17 + return value 1.18 + 1.19 +def part(string): 1.20 + """ Validates 0.0 <= float <= 1.0 """ 1.21 + try: 1.22 + value = float(string) 1.23 + except: 1.24 + msg = "%r is wrong float" % string 1.25 + raise argparse.ArgumentTypeError(msg) 1.26 + if not (0.0 <= value <= 1.0): 1.27 + msg = "%r is not float, representing part, ie in [0, 1]" % string 1.28 + raise argparse.ArgumentTypeError(msg) 1.29 + return value 1.30 + 1.31 +def timeout(string): 1.32 + """ Validates int >= -1 """ 1.33 + try: 1.34 + value = int(string) 1.35 + except: 1.36 + msg = "%r is wrong integer" % string 1.37 + raise argparse.ArgumentTypeError(msg) 1.38 + if value < -1: 1.39 + msg = "integer %r is less than -1" % string 1.40 + raise argparse.ArgumentTypeError(msg) 1.41 + return value 1.42 + 1.43 +def pos(string): 1.44 + """ Validates positive integer """ 1.45 + try: 1.46 + value = int(string) 1.47 + except: 1.48 + msg = "%r is wrong integer" % string 1.49 + raise argparse.ArgumentTypeError(msg) 1.50 + if value < 1: 1.51 + msg = "%r is not positive integer" % string 1.52 + raise argparse.ArgumentTypeError(msg) 1.53 + return value 1.54 + 1.55 +def i_nng(string): 1.56 + """ Validates int >= 0 """ 1.57 + try: 1.58 + value = int(string) 1.59 + except: 1.60 + msg = "%r is wrong integer" % string 1.61 + raise argparse.ArgumentTypeError(msg) 1.62 + if value < 0: 1.63 + msg = "integer %r is less than 0" % string 1.64 + raise argparse.ArgumentTypeError(msg) 1.65 + return value 1.66 +
2.1 --- a/geometrical_core/geometrical-core Mon Feb 21 00:02:27 2011 +0300 2.2 +++ b/geometrical_core/geometrical-core Mon Feb 21 14:30:22 2011 +0300 2.3 @@ -6,108 +6,54 @@ 2.4 2.5 import argparse 2.6 import os 2.7 -from tempfile import NamedTemporaryFile 2.8 2.9 from allpy import config 2.10 from protein_pdb import Alignment, Block, Monomer, Sequence 2.11 from allpy import markup 2.12 from allpy import fileio 2.13 +from spt import gcs_to_spt 2.14 +from argparse_validators import f_nng, part, timeout, pos, i_nng 2.15 2.16 r = argparse.FileType('r') 2.17 w = argparse.FileType('w') 2.18 c = config 2.19 2.20 -def f_nng(string): 2.21 - """ Validates nonnegative (>=0) float """ 2.22 - try: 2.23 - value = float(string) 2.24 - except: 2.25 - msg = "%r is wrong float" % string 2.26 - raise argparse.ArgumentTypeError(msg) 2.27 - if value < 0: 2.28 - msg = "%r is negative" % string 2.29 - raise argparse.ArgumentTypeError(msg) 2.30 - return value 2.31 - 2.32 -def part(string): 2.33 - """ Validates 0.0 <= float <= 1.0 """ 2.34 - try: 2.35 - value = float(string) 2.36 - except: 2.37 - msg = "%r is wrong float" % string 2.38 - raise argparse.ArgumentTypeError(msg) 2.39 - if not (0.0 <= value <= 1.0): 2.40 - msg = "%r is not float, representing part, ie in [0, 1]" % string 2.41 - raise argparse.ArgumentTypeError(msg) 2.42 - return value 2.43 - 2.44 -def timeout(string): 2.45 - """ Validates int >= -1 """ 2.46 - try: 2.47 - value = int(string) 2.48 - except: 2.49 - msg = "%r is wrong integer" % string 2.50 - raise argparse.ArgumentTypeError(msg) 2.51 - if value < -1: 2.52 - msg = "integer %r is less than -1" % string 2.53 - raise argparse.ArgumentTypeError(msg) 2.54 - return value 2.55 - 2.56 -def pos(string): 2.57 - """ Validates positive integer """ 2.58 - try: 2.59 - value = int(string) 2.60 - except: 2.61 - msg = "%r is wrong integer" % string 2.62 - raise argparse.ArgumentTypeError(msg) 2.63 - if value < 1: 2.64 - msg = "%r is not positive integer" % string 2.65 - raise argparse.ArgumentTypeError(msg) 2.66 - return value 2.67 - 2.68 -def i_nng(string): 2.69 - """ Validates int >= 0 """ 2.70 - try: 2.71 - value = int(string) 2.72 - except: 2.73 - msg = "%r is wrong integer" % string 2.74 - raise argparse.ArgumentTypeError(msg) 2.75 - if value < 0: 2.76 - msg = "integer %r is less than 0" % string 2.77 - raise argparse.ArgumentTypeError(msg) 2.78 - return value 2.79 - 2.80 p = argparse.ArgumentParser( 2.81 description='Geometrical Core building tool.', 2.82 epilog='''1) Distance spreading [angstrom] 2.83 2) -1 timeout means running Bron-Kerbosh algorithm without timeout 2.84 3) Alternative core new aa part: read documentation for more information 2.85 -4) Superposition core identifier: main core is 0, first alternative is 1 etc. ''', 2.86 +4) Superposition core: main core is 0, first alternative is 1 etc. ''', 2.87 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 2.88 -#~ argument_default=argparse.SUPPRESS, 2.89 ) 2.90 2.91 p.add_argument('-v','--version',action='version',version='%(prog)s 2.0') 2.92 -p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True) 2.93 -p.add_argument('-c',help='PDB names conformity file',metavar='FILE',type=r) 2.94 +p.add_argument('-i',help='Input alignment file', 2.95 + metavar='FILE',type=r,required=True) 2.96 +p.add_argument('-c',help='PDB names conformity file', 2.97 + metavar='FILE',type=r) 2.98 p.add_argument('-l',help='Output list file',metavar='FILE',type=w) 2.99 p.add_argument('-f',help='Output fasta file',metavar='FILE',type=w) 2.100 p.add_argument('-g',help='Output msf file',metavar='FILE',type=w) 2.101 p.add_argument('-p',help='Output pdb file',metavar='FILE',type=w) 2.102 p.add_argument('-s',help='Output spt file',metavar='FILE',type=w) 2.103 -p.add_argument('-d',help='Distance spreading',metavar='float',type=f_nng,default=c.delta) 2.104 -p.add_argument('-m',help='Min core size',metavar='int',type=pos,default=c.minsize) 2.105 -p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',metavar='int',type=timeout,default=c.timeout) 2.106 -p.add_argument('-n',help='Alternative core new aa part',metavar='float',type=part,default=c.ac_new_atoms) 2.107 -p.add_argument('-a',help='Cores count',metavar='int',type=i_nng,default=c.ac_count) 2.108 -p.add_argument('-x',help='Superposition core identifier',metavar='int',type=i_nng,default=0) 2.109 +p.add_argument('-d',help='Distance spreading', 2.110 + metavar='float',type=f_nng,default=c.delta) 2.111 +p.add_argument('-m',help='Min core size', 2.112 + metavar='int',type=pos,default=c.minsize) 2.113 +p.add_argument('-t',help='Bron-Kerbosh algorithm timeout', 2.114 + metavar='int',type=timeout,default=c.timeout) 2.115 +p.add_argument('-n',help='Alternative core new aa part', 2.116 + metavar='float',type=part,default=c.ac_new_atoms) 2.117 +p.add_argument('-a',help='Cores count', 2.118 + metavar='int',type=i_nng,default=c.ac_count) 2.119 +p.add_argument('-x',help='Superposition core identifier', 2.120 + metavar='int',type=i_nng,default=0) 2.121 2.122 -tmp_file = None 2.123 - 2.124 -#try: 2.125 args = p.parse_args() 2.126 2.127 -if not args.l and not args.f and not args.g and not args.p and not args.s: 2.128 +if not args.l and not args.f and not args.g\ 2.129 + and not args.p and not args.s: 2.130 print 'Error: no output file provided' 2.131 exit() 2.132 if not (args.p and args.s) and not (not args.p and not args.s): 2.133 @@ -176,51 +122,5 @@ 2.134 pdb_map = block.save_pdb(args.p) 2.135 2.136 if args.s: 2.137 - s = args.s # spt 2.138 - temp_i = 0 2.139 - for sequence in alignment.sequences: 2.140 - chain, model = pdb_map[sequence] 2.141 - s.write("define p_%(name)s *:%(chain)s/%(model)d\n" %\ 2.142 - {'name':sequence.name, 'chain':chain, 'model':model}) 2.143 - cores = [] 2.144 - for j, gc in enumerate(GCs): 2.145 - j = j or '' 2.146 - core_parts = [] 2.147 - for sequence in alignment.sequences: 2.148 - core_parts.append("core%(j)s_%(name)s"%\ 2.149 - {'name': sequence.name, 'j': j}) 2.150 - gc_n = [] 2.151 - for column in gc: 2.152 - monomer = column[sequence] 2.153 - residue = monomer.pdb_residue 2.154 - hetero_flag, n, insertion = residue.id 2.155 - gc_n.append(n) 2.156 - gc_n.sort() 2.157 - batches = [] 2.158 - while gc_n: 2.159 - batch = '(%s)' % (','.join(str(i) for i in gc_n[:50])) 2.160 - batches.append(batch) 2.161 - gc_n = gc_n[50:] 2.162 - if len(batches) == 1: 2.163 - s.write( 2.164 - "define core%(j)s_%(name)s %(batch)s and p_%(name)s\n"%\ 2.165 - {'name': sequence.name, 'batch': batch, 'j': j}) 2.166 - else: 2.167 - temps = [] 2.168 - for batch in batches: 2.169 - s.write( 2.170 - "define temp_%(i)d %(batch)s and p_%(name)s\n" %\ 2.171 - {'name': sequence.name, 'batch': batch, 'i': i}) 2.172 - temps.append("temp_%(i)d" % {'i': i}) 2.173 - temps_str = '(%s)' % (','.join(temps)) 2.174 - s.write( 2.175 - "define core%(j)s_%(name)s %(temps_str)s\n" % {'j':j, 2.176 - 'name': sequence.name, 'temps_str': temps_str}) 2.177 - s.write("define core%(j)s %(core_parts)s\n" % {'j':j, 2.178 - 'core_parts': ','.join(core_parts)}) 2.179 - cores.append("core%(j)s" % {'j': j}) 2.180 - s.write("define cores %(cores)s\n" % {'cores': ','.join(cores)}) 2.181 + gcs_to_spt(args.s, GCs, alignment, pdb_map) 2.182 2.183 -if tmp_file: 2.184 - os.unlink(tmp_file.name) 2.185 -
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/geometrical_core/spt.py Mon Feb 21 14:30:22 2011 +0300 3.3 @@ -0,0 +1,55 @@ 3.4 + 3.5 +def gcs_to_spt(spt, GCs, alignment, pdb_map): 3.6 + """ Save information about geometrical cores to spt 3.7 + 3.8 + * spt - file object for write 3.9 + * GCs - list of geomentrical cores. 3.10 + Geometrical core is a collection of Column's 3.11 + * alignment 3.12 + * pdb_map - {sequence: (chain, model)} 3.13 + char chain 3.14 + int model 3.15 + """ 3.16 + temp_i = 0 3.17 + for sequence in alignment.sequences: 3.18 + chain, model = pdb_map[sequence] 3.19 + spt.write("define p_%(name)s *:%(chain)s/%(model)d\n" %\ 3.20 + {'name':sequence.name, 'chain':chain, 'model':model}) 3.21 + cores = [] 3.22 + for j, gc in enumerate(GCs): 3.23 + j = j or '' 3.24 + core_parts = [] 3.25 + for sequence in alignment.sequences: 3.26 + core_parts.append("core%(j)s_%(name)s"%\ 3.27 + {'name': sequence.name, 'j': j}) 3.28 + gc_n = [] 3.29 + for column in gc: 3.30 + monomer = column[sequence] 3.31 + residue = monomer.pdb_residue 3.32 + hetero_flag, n, insertion = residue.id 3.33 + gc_n.append(n) 3.34 + gc_n.sort() 3.35 + batches = [] 3.36 + while gc_n: 3.37 + batch = '(%s)'%(','.join(str(i) for i in gc_n[:50])) 3.38 + batches.append(batch) 3.39 + gc_n = gc_n[50:] 3.40 + if len(batches) == 1: 3.41 + spt.write( 3.42 + "define core%(j)s_%(name)s %(batch)s and p_%(name)s\n"%\ 3.43 + {'name': sequence.name, 'batch': batch, 'j': j}) 3.44 + else: 3.45 + temps = [] 3.46 + for batch in batches: 3.47 + spt.write( 3.48 + "define temp_%(i)d %(batch)s and p_%(name)s\n"\ 3.49 + %{'name': sequence.name, 'batch': batch, 'i': i}) 3.50 + temps.append("temp_%(i)d" % {'i': i}) 3.51 + temps_str = '(%s)' % (','.join(temps)) 3.52 + spt.write( 3.53 + "define core%(j)s_%(name)s %(temps_str)s\n" % {'j':j, 3.54 + 'name': sequence.name, 'temps_str': temps_str}) 3.55 + spt.write("define core%(j)s %(core_parts)s\n" % {'j':j, 3.56 + 'core_parts': ','.join(core_parts)}) 3.57 + cores.append("core%(j)s" % {'j': j}) 3.58 + spt.write("define cores %(cores)s\n" % {'cores': ','.join(cores)})