allpy
changeset 730:3c09dabe0e96
create_file_with_monomer_homology once tested
author | Boris Burkov <BurkovBA@gmail.com> |
---|---|
date | Fri, 08 Jul 2011 20:42:57 +0400 |
parents | dc3e9ff89ab5 |
children | 257afd2bd903 |
files | sequence_based_blocks_search/blocks_finder.py |
diffstat | 1 files changed, 39 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- a/sequence_based_blocks_search/blocks_finder.py Fri Jul 08 18:38:31 2011 +0400 1.2 +++ b/sequence_based_blocks_search/blocks_finder.py Fri Jul 08 20:42:57 2011 +0400 1.3 @@ -371,5 +371,44 @@ 1.4 monomer_homology.classes[class_number].append((sequence.name, column[sequence].index+1)) 1.5 monomer_homology.monomer_ids[(sequence.name, column[sequence].index+1)] = class_number 1.6 1.7 + 1.8 +def create_file_with_monomer_homology(alignment, markup_file_name): 1.9 + """creates file with classes of homology, given alingment with links 1.10 + created by create_links function""" 1.11 + try: 1.12 + markup_file = open(markup_file_name,'w') 1.13 + except E: 1.14 + print E 1.15 + sys.exit() 1.16 + #creating markups 1.17 + for sequence in alignment.sequences: 1.18 + sim=SequenceIndexMarkup(sequence) 1.19 + sim.refresh() 1.20 + aim = AlignmentIndexMarkup(alignment) 1.21 + #inferring classes_of_equivalence = homologous monomers = connected_components from links 1.22 + class_number = 0 1.23 + for column in alignment.columns: 1.24 + remaining_sequences = copy.copy(column.keys()) 1.25 + while remaining_sequences!=[]: 1.26 + connected_component = find_connected_component(remaining_sequences, column) 1.27 + if connected_component == []: 1.28 + #create class for each of the remaining monomers 1.29 + for sequence in remaining_sequences: 1.30 + subset_of_sequences_in_column2file(markup_file, class_number, [sequence], column, aim) 1.31 + class_number+=1 1.32 + remaining_sequences.remove(sequence) 1.33 + else: 1.34 + subset_of_sequences_in_column2file(markup_file, class_number, connected_component, column, aim) 1.35 + class_number+=1 1.36 + for sequence in connected_component: 1.37 + remaining_sequences.remove(sequence) 1.38 + 1.39 + 1.40 +def subset_of_sequences_in_column2file(file, class_number, sequences, column, alignment_index_markup): 1.41 + for sequence in sequences: 1.42 + MonomerHomology.write_monomer(file, (sequence.name, column[sequence].index+1), class_number, alignment_index_markup[column]+1) 1.43 + 1.44 + 1.45 + 1.46 if __name__== '__main__': 1.47 main(open(sys.argv[1]))