Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/32808e442d93
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 08:05:24 2012
Кодировка:

# HG changeset patch
# User Boris Nagaev
# Date 1316349179 -14400
# Node ID 32808e442d93588322cc952d0b3a0d9b14f9efdc
# Parent 53800c7e3458ff38a82ead559c3f4bc903a0f39e
Geometrical core improvement: ignore cores, owned by one SS element

* add argument ignore_one_ss to geometrical_cores() method (=True)
* add argument ignore_one_ss to other methods of structure,
calling geometrical_cores() (=True)
* add command line option to blocks3d, geometrical_core, pair_cores (=False)
* remove xyz_only=True from pair_cores, since secondary structure determiner
needs structures to be loaded

This have not been tested!

close #119

diff -r 53800c7e3458 -r 32808e442d93 allpy/structure.py
--- a/allpy/structure.py Sun Sep 18 15:11:49 2011 +0400
+++ b/allpy/structure.py Sun Sep 18 16:32:59 2011 +0400
@@ -504,7 +504,8 @@

def geometrical_cores(self, max_delta=config.delta,
timeout=config.timeout, minsize=config.minsize,
- ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count):
+ ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count,
+ ignore_one_ss=True):
""" Return length-sorted list of GCs
GC is set of columns

@@ -517,6 +518,8 @@
difference = part of new atoms in current core
* ac_count -- max number of cores (including main core)
0 means infinity
+ * ignore_one_ss -- ignore geometrical cores, owned by one secondary
+ structure element in every sequence

weight is calculated as 1 / (delta + 1)
delta in [0, +inf) => weight in (0, 1]
@@ -554,11 +557,19 @@
GCs.append(clique)
if ac_count != 0 and len(GCs) >= ac_count:
break
+ if ignore_one_ss:
+ for s in self.sequences:
+ s.add_markup('ss')
+ def ss_number(sequence, columns):
+ return len(set(c[sequence].ss for c in columns))
+ def gc_is_ok(gc):
+ return any(ss_number(s, gc) > 1 for s in self.sequences)
+ GCs = [gc for gc in GCs if gc_is_ok(gc)]
return GCs

def pair_core_parts(self, max_delta=config.delta,
timeout=config.timeout, min_width=config.min_width,
- min_core_size=config.min_core_size, join=True):
+ min_core_size=config.min_core_size, ignore_one_ss=True, join=True):
""" Return list of continuous parts of gc for each sequence pair

part is block
@@ -574,7 +585,8 @@
block.sequences = [seq1, seq2]
cores = block.geometrical_cores(max_delta=max_delta,
timeout=timeout, minsize=min_core_size,
- ac_new_atoms=0.0, ac_count=0)
+ ac_new_atoms=0.0, ac_count=0,
+ ignore_one_ss=ignore_one_ss)
parts = []
for core in cores:
core_block = copy(block)
@@ -595,7 +607,7 @@

def blocks3d(self, max_delta=config.delta,
timeout=config.timeout, timeout_2=config.timeout_2,
- min_width=config.min_width, primary_cliques=False):
+ min_width=config.min_width, ignore_one_ss=True, primary_cliques=False):
""" Return length-sorted list of reliable blocks

* max_delta -- threshold of distance spreading
@@ -619,7 +631,7 @@
monomer2column[monomer] = column
monomer2sequence[monomer] = sequence
parts = self.pair_core_parts(max_delta=max_delta,
- timeout=timeout, min_width=min_width)
+ timeout=timeout, min_width=min_width, ignore_one_ss=ignore_one_ss)
boundaries = set() # of Columns
for part in parts:
boundaries.add(part.columns[0])
diff -r 53800c7e3458 -r 32808e442d93 blocks3d/blocks3d.py
--- a/blocks3d/blocks3d.py Sun Sep 18 15:11:49 2011 +0400
+++ b/blocks3d/blocks3d.py Sun Sep 18 16:32:59 2011 +0400
@@ -34,6 +34,7 @@
p.add_argument('-o',help='Output text file',metavar='FILE',type=w)
p.add_argument('-H',help='Output html file',metavar='FILE',type=w)
p.add_argument('-d',help='Distance spreading',metavar='float',type=f_nng,default=c.delta)
+p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False)
p.add_argument('-m',help='Min block width',metavar='int',type=pos,default=c.min_width)
p.add_argument('-t',help='Bron-Kerbosh (couple cores) timeout (-1 - unlimited)',metavar='int',type=timeout,default=0)
p.add_argument('-T',help='Bron-Kerbosh (blocks) timeout (-1 - unlimited)',metavar='int',type=timeout,default=c.timeout_2)
@@ -57,7 +58,7 @@

blocks = list(block.blocks3d(max_delta=args.d,
timeout=args.t, timeout_2=args.T,
- min_width=args.m))
+ min_width=args.m, ignore_one_ss=args.e))

if args.H:
alignment.blocks_to_html(args.H, blocks, open(html_template).read())
diff -r 53800c7e3458 -r 32808e442d93 geometrical_core/geometrical-core
--- a/geometrical_core/geometrical-core Sun Sep 18 15:11:49 2011 +0400
+++ b/geometrical_core/geometrical-core Sun Sep 18 16:32:59 2011 +0400
@@ -40,6 +40,7 @@
p.add_argument('-s',help='Output spt file',metavar='FILE',type=w)
p.add_argument('-d',help='Distance spreading',
metavar='float',type=f_nng,default=c.delta)
+p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False)
p.add_argument('-m',help='Min core size',
metavar='int',type=pos,default=c.minsize)
p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',
@@ -70,7 +71,7 @@
for sequence in block.sequences:
sequence.auto_pdb()
GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t,
- minsize=args.t, ac_new_atoms=args.n, ac_count=args.a)
+ minsize=args.t, ac_new_atoms=args.n, ac_count=args.a, ignore_one_ss=args.e)

column2pos = block.column2pos()

diff -r 53800c7e3458 -r 32808e442d93 pair_cores/pair_cores.py
--- a/pair_cores/pair_cores.py Sun Sep 18 15:11:49 2011 +0400
+++ b/pair_cores/pair_cores.py Sun Sep 18 16:32:59 2011 +0400
@@ -13,7 +13,7 @@
from html import html_template
from allpy.structure import CachedDownloadPdb, cached_download_pdb

-def homology_from_3d(markup_file, homology_file, max_delta, alignment_file=None,
+def homology_from_3d(markup_file, homology_file, max_delta, ignore_one_ss, alignment_file=None,
out_alignment_file=None, out_pair_cores_file=None, out_html_file=None,
pdb_getter=cached_download_pdb):
""" Turn pdb markup into homology_file
@@ -46,9 +46,9 @@
for sequence in copy(alignment.sequences):
try:
if markup_file:
- sequence.markups['pdb_resi'].add_pdb(download_pdb=pdb_getter, xyz_only=True)
+ sequence.markups['pdb_resi'].add_pdb(download_pdb=pdb_getter)
else:
- sequence.auto_pdb(xyz_only=True, pdb_getter=pdb_getter)
+ sequence.auto_pdb(pdb_getter=pdb_getter)
except Exception, e:
raise Exception("Can't load structure for sequence %s from file %s (%s: %s)" % \
(sequence.name, input_file, e.__class__, e))
@@ -74,11 +74,12 @@
p.add_argument('-y',help='Output homology file',metavar='FILE',type=w, required=True)
p.add_argument('-c',help='Pdb cache directory',metavar='DIR',type=str, default='pdb_cache')
p.add_argument('-d',help='Distance spreading',metavar='float',type=float,default=2.0)
+ p.add_argument('-e',help='Ignore cores, owned by one SS element',type=bool, default=False)
p.add_argument('-o',help='Output alignment file',metavar='FILE',type=w)
p.add_argument('-b',help='Output pair_cores file',metavar='FILE',type=w)
p.add_argument('-H',help='Output HTML file',metavar='FILE',type=w)
args = p.parse_args()
- homology_from_3d(markup_file=args.m, homology_file=args.y, max_delta=args.d,
+ homology_from_3d(markup_file=args.m, homology_file=args.y, max_delta=args.d, ignore_one_ss=args.e,
alignment_file=args.i, out_alignment_file=args.o, out_pair_cores_file=args.b,
out_html_file=args.H, pdb_getter=CachedDownloadPdb(cache_dir=args.c))