Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/7477ef0120e1
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 07:46:17 2012
Кодировка:

# HG changeset patch
# User Boris Burkov
# Date 1332507151 -14400
# Node ID 7477ef0120e1846785ffdf8d4f6f3d7673d4f802
# Parent 811914ba719530547078ce92e4ed040fea1863c9# Parent 370678689947f7ee66ac516a2e1022ac8981c929
Automated merge with ssh://kodomo.fbb.msu.ru/allpy

diff -r 811914ba7195 -r 7477ef0120e1 allpy/base.py
--- a/allpy/base.py Fri Mar 23 01:27:40 2012 +0400
+++ b/allpy/base.py Fri Mar 23 16:52:31 2012 +0400
@@ -512,6 +512,7 @@
"""Return hash by identity."""
return id(self)

+
class Block(Alignment):
"""Block of alignment.

diff -r 811914ba7195 -r 7477ef0120e1 sequence_based_blocks_search/blocks_finder.py
--- a/sequence_based_blocks_search/blocks_finder.py Fri Mar 23 01:27:40 2012 +0400
+++ b/sequence_based_blocks_search/blocks_finder.py Fri Mar 23 16:52:31 2012 +0400
@@ -4,10 +4,47 @@
sys.path.append("../")
from allpy import protein
from allpy.homology import *
+from allpy import data
+from allpy import base
import copy
import math
from functional_groups import functional_groups
from allpy.util import *
+from optparse import OptionParser
+
+
+class SbbsProteinMonomer(protein.Monomer):
+ def __eq__(self, other):
+ return self is other
+
+ def __ne__(self, other):
+ return not self is other
+
+SbbsProteinMonomer._initialize(data.codes.protein)
+
+class SbbsProteinAlignment(protein.Alignment):
+ def rows_as_strings(self, gaps='-'):
+ rows = []
+ for sequence in self.sequences:
+ string = ""
+ for column in self.columns:
+ if sequence in column:
+ if hasattr(column[sequence], "input_code1"):
+ string += column[sequence].input_code1
+ else:
+ string += column[sequence].code1
+ else:
+ string += gap
+ string = util.UserString(string)
+ string.sequence = sequence
+ rows.append(string)
+ return rows
+
+class SbbsProteinColumn(base.Column):
+ def __eq__(self, other):
+ return self is other
+
+vars(base)['Column'] = SbbsProteinColumn

class Preblock(object):
"""Attributes:
@@ -51,7 +88,10 @@

def main(file):
- alignment = protein.Alignment().append_file(file)
+ try:
+ alignment = SbbsProteinAlignment().append_file(file)
+ except Exception as E:
+ raise Exception("Failed to read alignment\nCheck alignment format")
groups_content = find_groups_content(alignment, functional_groups)
find_links(groups_content, alignment)
blocks = mark_blocks(alignment)
@@ -88,7 +128,7 @@
addition_to_candidate_preblocks.append(Preblock(index_of_column, index_of_column, copy.copy(groups_content[column][group_name]), functional_groups[group_name]["70%"], functional_groups[group_name]["100%"], 0.0, alignment))
except KeyError as ke:
print "Found non-canonical aminoacid: %s"%(ke)
- #create new preblocks as combinations of the old ones
+ #create new preblocks as combinations of the old ones
try:
for preblock in candidate_preblocks:
preblock_extensions = []
@@ -117,7 +157,7 @@
overlapping_preblocks=[]
for preblock in candidate_preblocks:
if preblock.alignment.columns[preblock.last_column_index] is column and preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(alignment.sequences), functional_groups):
- create_links(preblock, alignment) #links are not cliques, just connected components
+ create_links(preblock, alignment) #links are not cliques, just connected components
overlapping_preblocks+=accept_and_return_overlapping_preblocks_before(preblock, alignment, candidate_preblocks)
if preblock not in overlapping_preblocks: new_candidate_preblocks.append(preblock)
candidate_preblocks = new_candidate_preblocks
@@ -218,7 +258,7 @@
inconserved positions, create the transition matrix, calculate
its eigenvectors and eigenvalues, round the weights, find coefficient
of [1,0,0,...] decomposition in the basis of eigenvectors.
-
+

"""

@@ -244,13 +284,13 @@
# column[iterated_sequence].links=[sequence]
# else:
# if sequence not in column[iterated_sequence].links: column[iterated_sequence].links.append(sequence)
-
+
if "links" not in column[any_sequence].__dict__:
column[any_sequence].links=[]
remaining_sequences = copy.copy(preblock.sequences)
remaining_sequences.remove(any_sequence)
for sequence in preblock.sequences:#links are bidirectional
- if sequence not in column[any_sequence].links:
+ if sequence not in column[any_sequence].links:
column[any_sequence].links.append(sequence)
if "links" not in column[sequence].__dict__:
column[sequence].links=[any_sequence]
@@ -304,8 +344,8 @@
#print index_of_column, [gblock.columns for gblock in blocks_in_this_column]
if alignment.columns.index(column) == len(alignment.columns)-1: output+=blocks_in_this_column
return output
-
-
+
+
def find_connected_component(sequences, column):
"""output is a list of sequences"""
#find one sequence that has blocks
@@ -326,7 +366,7 @@
if another_sequence not in queue and another_sequence not in output and another_sequence in sequences:
queue.append(another_sequence)
return output
-
+

def delete_links(alignment):
for sequence in alignment.sequences:
@@ -334,7 +374,7 @@
if "links" in monomer.__dict__:
del monomer.links

-#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147]
+#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147]

def create_monomer_homology(alignment):
"""returns MonomerHomology object, given alignment
@@ -380,8 +420,16 @@
sys.exit()
#creating markups
for sequence in alignment.sequences:
- sequence.add_markup('index')
- aim = alignment.add_markup('index')
+ if 'markups' in sequence.__dict__:
+ if 'index' in sequence.markups:
+ pass
+ else:
+ sequence.add_markup('index')
+ if 'markups' in alignment.__dict__:
+ if 'index' in alignment.markups:
+ aim = alignment.markups['index']
+ else:
+ aim = alignment.add_markup('index')
#inferring classes_of_equivalence = homologous monomers = connected_components from links
class_number = 0
for column in alignment.columns:
@@ -408,4 +456,27 @@

if __name__== '__main__':
- main(open(sys.argv[1]))
+ usage = "usage: %prog [options] arg"
+ parser = OptionParser(usage)
+ parser.add_option("-c", "--classes", dest="output_classes", help="Name of optional output file with homology classes")
+ (options, args) = parser.parse_args()
+ input_file = open(args[0])
+ blocks = main(input_file)
+ if blocks == []:
+ sys.exit()
+ aim = blocks[0].alignment.add_markup('index') #create markup
+ file_content=""
+ for index, block in enumerate(blocks):
+ for sequence in block.sequences:
+ file_content+=str(str(args[1]))+"\t"+str(index)+"\t"+str(sequence.name)+"\t"
+ buffer=""
+ for column in block.columns:
+ buffer+=str(aim[column])+" "
+ if (len(buffer)!=0): buffer=buffer[:-1]
+ file_content+=buffer+"\n"
+
+ output_file = open(args[1], 'w')
+ output_file.write(file_content)
+
+ if options.output_classes:
+ create_file_with_monomer_homology(blocks[0].alignment, options.output_classes)