Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/32808e442d93
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:57:22 2012
Кодировка:
allpy: 32808e442d93

allpy

changeset 889:32808e442d93

Geometrical core improvement: ignore cores, owned by one SS element * add argument ignore_one_ss to geometrical_cores() method (=True) * add argument ignore_one_ss to other methods of structure, calling geometrical_cores() (=True) * add command line option to blocks3d, geometrical_core, pair_cores (=False) * remove xyz_only=True from pair_cores, since secondary structure determiner needs structures to be loaded This have not been tested! close #119
author Boris Nagaev <bnagaev@gmail.com>
date Sun, 18 Sep 2011 16:32:59 +0400
parents 53800c7e3458
children e7760fc574b5
files allpy/structure.py blocks3d/blocks3d.py geometrical_core/geometrical-core pair_cores/pair_cores.py
diffstat 4 files changed, 26 insertions(+), 11 deletions(-) [+]
line diff
     1.1 --- a/allpy/structure.py	Sun Sep 18 15:11:49 2011 +0400
     1.2 +++ b/allpy/structure.py	Sun Sep 18 16:32:59 2011 +0400
     1.3 @@ -504,7 +504,8 @@
     1.4  
     1.5      def geometrical_cores(self, max_delta=config.delta,
     1.6      timeout=config.timeout, minsize=config.minsize,
     1.7 -    ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count):
     1.8 +    ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count,
     1.9 +    ignore_one_ss=True):
    1.10          """ Return length-sorted list of GCs
    1.11          GC is set of columns
    1.12  
    1.13 @@ -517,6 +518,8 @@
    1.14              difference = part of new atoms in current core
    1.15          * ac_count -- max number of cores (including main core)
    1.16              0 means infinity
    1.17 +        * ignore_one_ss -- ignore geometrical cores, owned by one secondary
    1.18 +            structure element in every sequence
    1.19  
    1.20          weight is calculated as 1 / (delta + 1)
    1.21              delta in [0, +inf) => weight in (0, 1]
    1.22 @@ -554,11 +557,19 @@
    1.23                  GCs.append(clique)
    1.24                  if ac_count != 0 and len(GCs) >= ac_count:
    1.25                      break
    1.26 +        if ignore_one_ss:
    1.27 +            for s in self.sequences:
    1.28 +                s.add_markup('ss')
    1.29 +            def ss_number(sequence, columns):
    1.30 +                return len(set(c[sequence].ss for c in columns))
    1.31 +            def gc_is_ok(gc):
    1.32 +                return any(ss_number(s, gc) > 1 for s in self.sequences)
    1.33 +            GCs = [gc for gc in GCs if gc_is_ok(gc)]
    1.34          return GCs
    1.35  
    1.36      def pair_core_parts(self, max_delta=config.delta,
    1.37      timeout=config.timeout, min_width=config.min_width,
    1.38 -    min_core_size=config.min_core_size, join=True):
    1.39 +    min_core_size=config.min_core_size, ignore_one_ss=True, join=True):
    1.40          """ Return list of continuous parts of gc for each sequence pair
    1.41  
    1.42          part is block
    1.43 @@ -574,7 +585,8 @@
    1.44                      block.sequences = [seq1, seq2]
    1.45                      cores = block.geometrical_cores(max_delta=max_delta,
    1.46                          timeout=timeout, minsize=min_core_size,
    1.47 -                        ac_new_atoms=0.0, ac_count=0)
    1.48 +                        ac_new_atoms=0.0, ac_count=0,
    1.49 +                        ignore_one_ss=ignore_one_ss)
    1.50                      parts = []
    1.51                      for core in cores:
    1.52                          core_block = copy(block)
    1.53 @@ -595,7 +607,7 @@
    1.54  
    1.55      def blocks3d(self, max_delta=config.delta,
    1.56      timeout=config.timeout, timeout_2=config.timeout_2,
    1.57 -    min_width=config.min_width, primary_cliques=False):
    1.58 +    min_width=config.min_width, ignore_one_ss=True, primary_cliques=False):
    1.59          """ Return length-sorted list of reliable blocks
    1.60  
    1.61          * max_delta -- threshold of distance spreading
    1.62 @@ -619,7 +631,7 @@
    1.63                  monomer2column[monomer] = column
    1.64                  monomer2sequence[monomer] = sequence
    1.65          parts = self.pair_core_parts(max_delta=max_delta,
    1.66 -            timeout=timeout, min_width=min_width)
    1.67 +            timeout=timeout, min_width=min_width, ignore_one_ss=ignore_one_ss)
    1.68          boundaries = set() # of Columns
    1.69          for part in parts:
    1.70              boundaries.add(part.columns[0])
     2.1 --- a/blocks3d/blocks3d.py	Sun Sep 18 15:11:49 2011 +0400
     2.2 +++ b/blocks3d/blocks3d.py	Sun Sep 18 16:32:59 2011 +0400
     2.3 @@ -34,6 +34,7 @@
     2.4  p.add_argument('-o',help='Output text file',metavar='FILE',type=w)
     2.5  p.add_argument('-H',help='Output html file',metavar='FILE',type=w)
     2.6  p.add_argument('-d',help='Distance spreading',metavar='float',type=f_nng,default=c.delta)
     2.7 +p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False)
     2.8  p.add_argument('-m',help='Min block width',metavar='int',type=pos,default=c.min_width)
     2.9  p.add_argument('-t',help='Bron-Kerbosh (couple cores) timeout (-1 - unlimited)',metavar='int',type=timeout,default=0)
    2.10  p.add_argument('-T',help='Bron-Kerbosh (blocks) timeout (-1 - unlimited)',metavar='int',type=timeout,default=c.timeout_2)
    2.11 @@ -57,7 +58,7 @@
    2.12  
    2.13  blocks = list(block.blocks3d(max_delta=args.d,
    2.14      timeout=args.t, timeout_2=args.T,
    2.15 -    min_width=args.m))
    2.16 +    min_width=args.m, ignore_one_ss=args.e))
    2.17  
    2.18  if args.H:
    2.19      alignment.blocks_to_html(args.H, blocks, open(html_template).read())
     3.1 --- a/geometrical_core/geometrical-core	Sun Sep 18 15:11:49 2011 +0400
     3.2 +++ b/geometrical_core/geometrical-core	Sun Sep 18 16:32:59 2011 +0400
     3.3 @@ -40,6 +40,7 @@
     3.4  p.add_argument('-s',help='Output spt file',metavar='FILE',type=w)
     3.5  p.add_argument('-d',help='Distance spreading',
     3.6      metavar='float',type=f_nng,default=c.delta)
     3.7 +p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False)
     3.8  p.add_argument('-m',help='Min core size',
     3.9      metavar='int',type=pos,default=c.minsize)
    3.10  p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',
    3.11 @@ -70,7 +71,7 @@
    3.12  for sequence in block.sequences:
    3.13      sequence.auto_pdb()
    3.14  GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t,
    3.15 -    minsize=args.t, ac_new_atoms=args.n, ac_count=args.a)
    3.16 +    minsize=args.t, ac_new_atoms=args.n, ac_count=args.a, ignore_one_ss=args.e)
    3.17  
    3.18  column2pos = block.column2pos()
    3.19  
     4.1 --- a/pair_cores/pair_cores.py	Sun Sep 18 15:11:49 2011 +0400
     4.2 +++ b/pair_cores/pair_cores.py	Sun Sep 18 16:32:59 2011 +0400
     4.3 @@ -13,7 +13,7 @@
     4.4  from html import html_template
     4.5  from allpy.structure import CachedDownloadPdb, cached_download_pdb
     4.6  
     4.7 -def homology_from_3d(markup_file, homology_file, max_delta, alignment_file=None,
     4.8 +def homology_from_3d(markup_file, homology_file, max_delta, ignore_one_ss, alignment_file=None,
     4.9      out_alignment_file=None, out_pair_cores_file=None, out_html_file=None,
    4.10      pdb_getter=cached_download_pdb):
    4.11      """ Turn pdb markup into homology_file
    4.12 @@ -46,9 +46,9 @@
    4.13      for sequence in copy(alignment.sequences):
    4.14          try:
    4.15              if markup_file:
    4.16 -                sequence.markups['pdb_resi'].add_pdb(download_pdb=pdb_getter, xyz_only=True)
    4.17 +                sequence.markups['pdb_resi'].add_pdb(download_pdb=pdb_getter)
    4.18              else:
    4.19 -                sequence.auto_pdb(xyz_only=True, pdb_getter=pdb_getter)
    4.20 +                sequence.auto_pdb(pdb_getter=pdb_getter)
    4.21          except Exception, e:
    4.22              raise Exception("Can't load structure for sequence %s from file %s (%s: %s)" % \
    4.23                  (sequence.name, input_file, e.__class__, e))
    4.24 @@ -74,11 +74,12 @@
    4.25      p.add_argument('-y',help='Output homology file',metavar='FILE',type=w, required=True)
    4.26      p.add_argument('-c',help='Pdb cache directory',metavar='DIR',type=str, default='pdb_cache')
    4.27      p.add_argument('-d',help='Distance spreading',metavar='float',type=float,default=2.0)
    4.28 +    p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False)
    4.29      p.add_argument('-o',help='Output alignment file',metavar='FILE',type=w)
    4.30      p.add_argument('-b',help='Output pair_cores file',metavar='FILE',type=w)
    4.31      p.add_argument('-H',help='Output HTML file',metavar='FILE',type=w)
    4.32      args = p.parse_args()
    4.33 -    homology_from_3d(markup_file=args.m, homology_file=args.y, max_delta=args.d,
    4.34 +    homology_from_3d(markup_file=args.m, homology_file=args.y, max_delta=args.d, ignore_one_ss=args.e,
    4.35          alignment_file=args.i, out_alignment_file=args.o, out_pair_cores_file=args.b,
    4.36          out_html_file=args.H, pdb_getter=CachedDownloadPdb(cache_dir=args.c))
    4.37