allpy
changeset 889:32808e442d93
Geometrical core improvement: ignore cores, owned by one SS element
* add argument ignore_one_ss to geometrical_cores() method (=True)
* add argument ignore_one_ss to other methods of structure,
calling geometrical_cores() (=True)
* add command line option to blocks3d, geometrical_core, pair_cores (=False)
* remove xyz_only=True from pair_cores, since secondary structure determiner
needs structures to be loaded
This have not been tested!
close #119
author | Boris Nagaev <bnagaev@gmail.com> |
---|---|
date | Sun, 18 Sep 2011 16:32:59 +0400 |
parents | 53800c7e3458 |
children | e7760fc574b5 |
files | allpy/structure.py blocks3d/blocks3d.py geometrical_core/geometrical-core pair_cores/pair_cores.py |
diffstat | 4 files changed, 26 insertions(+), 11 deletions(-) [+] |
line diff
1.1 --- a/allpy/structure.py Sun Sep 18 15:11:49 2011 +0400 1.2 +++ b/allpy/structure.py Sun Sep 18 16:32:59 2011 +0400 1.3 @@ -504,7 +504,8 @@ 1.4 1.5 def geometrical_cores(self, max_delta=config.delta, 1.6 timeout=config.timeout, minsize=config.minsize, 1.7 - ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count): 1.8 + ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count, 1.9 + ignore_one_ss=True): 1.10 """ Return length-sorted list of GCs 1.11 GC is set of columns 1.12 1.13 @@ -517,6 +518,8 @@ 1.14 difference = part of new atoms in current core 1.15 * ac_count -- max number of cores (including main core) 1.16 0 means infinity 1.17 + * ignore_one_ss -- ignore geometrical cores, owned by one secondary 1.18 + structure element in every sequence 1.19 1.20 weight is calculated as 1 / (delta + 1) 1.21 delta in [0, +inf) => weight in (0, 1] 1.22 @@ -554,11 +557,19 @@ 1.23 GCs.append(clique) 1.24 if ac_count != 0 and len(GCs) >= ac_count: 1.25 break 1.26 + if ignore_one_ss: 1.27 + for s in self.sequences: 1.28 + s.add_markup('ss') 1.29 + def ss_number(sequence, columns): 1.30 + return len(set(c[sequence].ss for c in columns)) 1.31 + def gc_is_ok(gc): 1.32 + return any(ss_number(s, gc) > 1 for s in self.sequences) 1.33 + GCs = [gc for gc in GCs if gc_is_ok(gc)] 1.34 return GCs 1.35 1.36 def pair_core_parts(self, max_delta=config.delta, 1.37 timeout=config.timeout, min_width=config.min_width, 1.38 - min_core_size=config.min_core_size, join=True): 1.39 + min_core_size=config.min_core_size, ignore_one_ss=True, join=True): 1.40 """ Return list of continuous parts of gc for each sequence pair 1.41 1.42 part is block 1.43 @@ -574,7 +585,8 @@ 1.44 block.sequences = [seq1, seq2] 1.45 cores = block.geometrical_cores(max_delta=max_delta, 1.46 timeout=timeout, minsize=min_core_size, 1.47 - ac_new_atoms=0.0, ac_count=0) 1.48 + ac_new_atoms=0.0, ac_count=0, 1.49 + ignore_one_ss=ignore_one_ss) 1.50 parts = [] 1.51 for core in cores: 1.52 core_block = copy(block) 1.53 @@ -595,7 +607,7 @@ 1.54 1.55 def blocks3d(self, max_delta=config.delta, 1.56 timeout=config.timeout, timeout_2=config.timeout_2, 1.57 - min_width=config.min_width, primary_cliques=False): 1.58 + min_width=config.min_width, ignore_one_ss=True, primary_cliques=False): 1.59 """ Return length-sorted list of reliable blocks 1.60 1.61 * max_delta -- threshold of distance spreading 1.62 @@ -619,7 +631,7 @@ 1.63 monomer2column[monomer] = column 1.64 monomer2sequence[monomer] = sequence 1.65 parts = self.pair_core_parts(max_delta=max_delta, 1.66 - timeout=timeout, min_width=min_width) 1.67 + timeout=timeout, min_width=min_width, ignore_one_ss=ignore_one_ss) 1.68 boundaries = set() # of Columns 1.69 for part in parts: 1.70 boundaries.add(part.columns[0])
2.1 --- a/blocks3d/blocks3d.py Sun Sep 18 15:11:49 2011 +0400 2.2 +++ b/blocks3d/blocks3d.py Sun Sep 18 16:32:59 2011 +0400 2.3 @@ -34,6 +34,7 @@ 2.4 p.add_argument('-o',help='Output text file',metavar='FILE',type=w) 2.5 p.add_argument('-H',help='Output html file',metavar='FILE',type=w) 2.6 p.add_argument('-d',help='Distance spreading',metavar='float',type=f_nng,default=c.delta) 2.7 +p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False) 2.8 p.add_argument('-m',help='Min block width',metavar='int',type=pos,default=c.min_width) 2.9 p.add_argument('-t',help='Bron-Kerbosh (couple cores) timeout (-1 - unlimited)',metavar='int',type=timeout,default=0) 2.10 p.add_argument('-T',help='Bron-Kerbosh (blocks) timeout (-1 - unlimited)',metavar='int',type=timeout,default=c.timeout_2) 2.11 @@ -57,7 +58,7 @@ 2.12 2.13 blocks = list(block.blocks3d(max_delta=args.d, 2.14 timeout=args.t, timeout_2=args.T, 2.15 - min_width=args.m)) 2.16 + min_width=args.m, ignore_one_ss=args.e)) 2.17 2.18 if args.H: 2.19 alignment.blocks_to_html(args.H, blocks, open(html_template).read())
3.1 --- a/geometrical_core/geometrical-core Sun Sep 18 15:11:49 2011 +0400 3.2 +++ b/geometrical_core/geometrical-core Sun Sep 18 16:32:59 2011 +0400 3.3 @@ -40,6 +40,7 @@ 3.4 p.add_argument('-s',help='Output spt file',metavar='FILE',type=w) 3.5 p.add_argument('-d',help='Distance spreading', 3.6 metavar='float',type=f_nng,default=c.delta) 3.7 +p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False) 3.8 p.add_argument('-m',help='Min core size', 3.9 metavar='int',type=pos,default=c.minsize) 3.10 p.add_argument('-t',help='Bron-Kerbosh algorithm timeout', 3.11 @@ -70,7 +71,7 @@ 3.12 for sequence in block.sequences: 3.13 sequence.auto_pdb() 3.14 GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 3.15 - minsize=args.t, ac_new_atoms=args.n, ac_count=args.a) 3.16 + minsize=args.t, ac_new_atoms=args.n, ac_count=args.a, ignore_one_ss=args.e) 3.17 3.18 column2pos = block.column2pos() 3.19
4.1 --- a/pair_cores/pair_cores.py Sun Sep 18 15:11:49 2011 +0400 4.2 +++ b/pair_cores/pair_cores.py Sun Sep 18 16:32:59 2011 +0400 4.3 @@ -13,7 +13,7 @@ 4.4 from html import html_template 4.5 from allpy.structure import CachedDownloadPdb, cached_download_pdb 4.6 4.7 -def homology_from_3d(markup_file, homology_file, max_delta, alignment_file=None, 4.8 +def homology_from_3d(markup_file, homology_file, max_delta, ignore_one_ss, alignment_file=None, 4.9 out_alignment_file=None, out_pair_cores_file=None, out_html_file=None, 4.10 pdb_getter=cached_download_pdb): 4.11 """ Turn pdb markup into homology_file 4.12 @@ -46,9 +46,9 @@ 4.13 for sequence in copy(alignment.sequences): 4.14 try: 4.15 if markup_file: 4.16 - sequence.markups['pdb_resi'].add_pdb(download_pdb=pdb_getter, xyz_only=True) 4.17 + sequence.markups['pdb_resi'].add_pdb(download_pdb=pdb_getter) 4.18 else: 4.19 - sequence.auto_pdb(xyz_only=True, pdb_getter=pdb_getter) 4.20 + sequence.auto_pdb(pdb_getter=pdb_getter) 4.21 except Exception, e: 4.22 raise Exception("Can't load structure for sequence %s from file %s (%s: %s)" % \ 4.23 (sequence.name, input_file, e.__class__, e)) 4.24 @@ -74,11 +74,12 @@ 4.25 p.add_argument('-y',help='Output homology file',metavar='FILE',type=w, required=True) 4.26 p.add_argument('-c',help='Pdb cache directory',metavar='DIR',type=str, default='pdb_cache') 4.27 p.add_argument('-d',help='Distance spreading',metavar='float',type=float,default=2.0) 4.28 + p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False) 4.29 p.add_argument('-o',help='Output alignment file',metavar='FILE',type=w) 4.30 p.add_argument('-b',help='Output pair_cores file',metavar='FILE',type=w) 4.31 p.add_argument('-H',help='Output HTML file',metavar='FILE',type=w) 4.32 args = p.parse_args() 4.33 - homology_from_3d(markup_file=args.m, homology_file=args.y, max_delta=args.d, 4.34 + homology_from_3d(markup_file=args.m, homology_file=args.y, max_delta=args.d, ignore_one_ss=args.e, 4.35 alignment_file=args.i, out_alignment_file=args.o, out_pair_cores_file=args.b, 4.36 out_html_file=args.H, pdb_getter=CachedDownloadPdb(cache_dir=args.c)) 4.37