allpy
changeset 270:4e6e85851133
Code cleanup: removed trailing spaces everywhere in the code
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Wed, 15 Dec 2010 18:30:19 +0300 |
parents | cf24b0082aa0 |
children | e7c8dd407040 |
files | allpy/base.py allpy/data/__init__.py allpy/fasta.py allpy/graph.py allpy/pdb.py blocks3d/AlignmentSeq.py blocks3d/Blocks3D.py blocks3d/Blocks3D_class.py blocks3d/GeometricalCore_class.py blocks3d/Kliki.py blocks3d/clon.py pytale/dummy_pytale.py repeats/repeat_joiner.py repeats/repeats.py repeats/test.py sandbox/bufferedcanvas.py sandbox/gtk-text.py sandbox/tk-text.py sandbox/ttk.py sandbox/wx-textctrl.py sec_str/sec_str.py |
diffstat | 21 files changed, 891 insertions(+), 891 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Wed Dec 15 02:22:38 2010 +0300 1.2 +++ b/allpy/base.py Wed Dec 15 18:30:19 2010 +0300 1.3 @@ -130,18 +130,18 @@ 1.4 """ Sequence of Monomers 1.5 1.6 list of monomer objects (aminoacids or nucleotides) 1.7 - 1.8 + 1.9 Mandatory data: 1.10 1.11 * name -- str with the name of sequence 1.12 * description -- str with description of the sequence 1.13 - 1.14 + 1.15 Optional (may be empty): 1.16 1.17 * source -- source of sequence 1.18 * pdb_chain -- Bio.PDB.Chain 1.19 * pdb_file -- file object 1.20 - 1.21 + 1.22 * pdb_residues -- {Monomer: Bio.PDB.Residue} 1.23 * pdb_secstr -- {Monomer: 'Secondary structure'} 1.24 Code Secondary structure 1.25 @@ -153,42 +153,42 @@ 1.26 T Turn 1.27 S Bend 1.28 - Other 1.29 - 1.30 - 1.31 - ?TODO: global pdb_structures 1.32 + 1.33 + 1.34 + ?TODO: global pdb_structures 1.35 """ 1.36 def __init__(self, monomers=None, name='', description=""): 1.37 if not monomers: 1.38 monomers = [] 1.39 self.name = name 1.40 self.description = description 1.41 - self.monomers = monomers 1.42 + self.monomers = monomers 1.43 self.pdb_chains = [] 1.44 self.pdb_files = {} 1.45 self.pdb_residues = {} 1.46 self.pdb_secstr = {} 1.47 - 1.48 + 1.49 def __len__(self): 1.50 return len(self.monomers) 1.51 - 1.52 + 1.53 def __str__(self): 1.54 """ Returns sequence in one-letter code """ 1.55 return ''.join([monomer.type.code1 for monomer in self.monomers]) 1.56 - 1.57 + 1.58 def __eq__(self, other): 1.59 """ Returns if all corresponding monomers of this sequences are equal 1.60 - 1.61 + 1.62 If lengths of sequences are not equal, returns False 1.63 - """ 1.64 + """ 1.65 return len(self) == len(other) and \ 1.66 all([a==b for a, b in zip(self.monomers, other.monomers)]) 1.67 - 1.68 + 1.69 def __ne__(self, other): 1.70 return not (self == other) 1.71 - 1.72 + 1.73 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0): 1.74 - """ Reads Pdb chain from file 1.75 - 1.76 + """ Reads Pdb chain from file 1.77 + 1.78 and align each Monomer with PDB.Residue (TODO) 1.79 """ 1.80 name = std_id(pdb_id, pdb_chain, pdb_model) 1.81 @@ -205,7 +205,7 @@ 1.82 residue = pdb_sequence.pdb_residues[chain][pdb_monomer] 1.83 self.pdb_residues[chain][monomer] = residue 1.84 self.pdb_files[chain] = pdb_file 1.85 - 1.86 + 1.87 def pdb_unload(self): 1.88 """ Delete all pdb-connected links """ 1.89 #~ gc.get_referrers(self.pdb_chains[0]) 1.90 @@ -213,11 +213,11 @@ 1.91 self.pdb_residues = {} 1.92 self.pdb_secstr = {} # FIXME 1.93 self.pdb_files = {} # FIXME 1.94 - 1.95 + 1.96 @staticmethod 1.97 def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType): 1.98 """ Import data from one-letter code 1.99 - 1.100 + 1.101 monomer_kind is class, inherited from MonomerType 1.102 """ 1.103 monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str] 1.104 @@ -226,7 +226,7 @@ 1.105 @staticmethod 1.106 def from_pdb_chain(chain): 1.107 """ Returns Sequence with Monomers with link to Bio.PDB.Residue 1.108 - 1.109 + 1.110 chain is Bio.PDB.Chain 1.111 """ 1.112 cappbuilder = CaPPBuilder() 1.113 @@ -242,20 +242,20 @@ 1.114 sequence.pdb_residues[chain][monomer] = residue 1.115 sequence.monomers.append(monomer) 1.116 return sequence 1.117 - 1.118 + 1.119 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'): 1.120 """ Adds pdb information to each monomer 1.121 - 1.122 + 1.123 Returns if information has been successfully added 1.124 TODO: conformity_file 1.125 - 1.126 + 1.127 id-format lava flow 1.128 """ 1.129 if not conformity_info: 1.130 path = os.path.join(pdb_directory, self.name) 1.131 if os.path.exists(path) and os.path.getsize(path): 1.132 match = pdb_id_parse(self.name) 1.133 - self.pdb_chain_add(open(path), match['code'], 1.134 + self.pdb_chain_add(open(path), match['code'], 1.135 match['chain'], match['model']) 1.136 else: 1.137 match = pdb_id_parse(self.name) 1.138 @@ -272,7 +272,7 @@ 1.139 print "Save %s" % pdb_filename 1.140 pdb_file = open(pdb_filename) 1.141 self.pdb_chain_add(pdb_file, code, match['chain'], match['model']) 1.142 - 1.143 + 1.144 def pdb_save(self, out_filename, pdb_chain): 1.145 """ Saves pdb_chain to out_file """ 1.146 class GlySelect(Select): 1.147 @@ -285,8 +285,8 @@ 1.148 structure = chain.get_parent() 1.149 io.set_structure(structure) 1.150 io.save(out_filename, GlySelect()) 1.151 - 1.152 - 1.153 + 1.154 + 1.155 def pdb_add_sec_str(self, pdb_chain): 1.156 """ Add secondary structure data """ 1.157 tmp_file = NamedTemporaryFile(delete=False) 1.158 @@ -303,16 +303,16 @@ 1.159 except: 1.160 print "No dssp information about %s at %s" % (monomer, pdb_chain) 1.161 os.unlink(tmp_file.name) 1.162 - 1.163 + 1.164 def pdb_has(self, chain, monomer): 1.165 return chain in self.pdb_residues and monomer in self.pdb_residues[chain] 1.166 - 1.167 + 1.168 def secstr_has(self, chain, monomer): 1.169 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain] 1.170 1.171 @staticmethod 1.172 def file_slice(file, n_from, n_to, fasta_name='', name='', description='', monomer_kind=AminoAcidType): 1.173 - """ Build and return sequence, consisting of part of sequence from file 1.174 + """ Build and return sequence, consisting of part of sequence from file 1.175 1.176 Does not control gaps 1.177 """ 1.178 @@ -331,7 +331,7 @@ 1.179 1.180 class Alignment(dict): 1.181 """ Alignment 1.182 - 1.183 + 1.184 {<Sequence object>:[<Monomer object>,None,<Monomer object>]} 1.185 1.186 keys are the Sequence objects, values are the lists, which 1.187 @@ -348,7 +348,7 @@ 1.188 Alignment(sequences, body) -> new Alignment with sequences and 1.189 body initialized from arguments 1.190 Alignment(fasta_file) -> new Alignment, read body and sequences 1.191 - from fasta file 1.192 + from fasta file 1.193 1.194 """ 1.195 if len(args)>1:#overloaded constructor 1.196 @@ -373,7 +373,7 @@ 1.197 1.198 For every (row, column) in alignment the percentage of the exactly 1.199 same residue in the same column in the alignment is calculated. 1.200 - The data structure is just like the Alignment.body, but istead of 1.201 + The data structure is just like the Alignment.body, but istead of 1.202 monomers it contains float percentages. 1.203 """ 1.204 # Oh, God, that's awful! Absolutely not understandable. 1.205 @@ -410,11 +410,11 @@ 1.206 @staticmethod 1.207 def from_fasta(file, monomer_kind=AminoAcidType): 1.208 """ Import data from fasta file 1.209 - 1.210 + 1.211 monomer_kind is class, inherited from MonomerType 1.212 - 1.213 + 1.214 >>> import alignment 1.215 - >>> sequences,body=alignment.Alignment.from_fasta(open("test.fasta")) 1.216 + >>> sequences,body=alignment.Alignment.from_fasta(open("test.fasta")) 1.217 """ 1.218 import re 1.219 1.220 @@ -424,7 +424,7 @@ 1.221 raw_sequences = file.read().split(">") 1.222 if len(raw_sequences) <= 1: 1.223 raise Exception("Wrong format of fasta-file %s" % file.name) 1.224 - 1.225 + 1.226 raw_sequences = raw_sequences[1:] #ignore everything before the first > 1.227 for raw in raw_sequences: 1.228 parsed_raw_sequence = raw.split("\n") 1.229 @@ -433,14 +433,14 @@ 1.230 name_and_description = name_and_description.split(" ",1) 1.231 if len(name_and_description) == 2: 1.232 name, description = name_and_description 1.233 - elif len(name_and_description) == 1: 1.234 + elif len(name_and_description) == 1: 1.235 #if there is description 1.236 name = name_and_description[0] 1.237 description = '' 1.238 else: 1.239 raise Exception("Wrong name of sequence %(name)$ fasta-file %(file)s" % \ 1.240 {'name': name, 'file': file.name}) 1.241 - 1.242 + 1.243 if len(parsed_raw_sequence) <= 1: 1.244 raise Exception("Wrong format of sequence %(name)$ fasta-file %(file)s" % \ 1.245 {'name': name, 'file': file.name}) 1.246 @@ -460,12 +460,12 @@ 1.247 sequences.append(s) 1.248 body[s] = body_list 1.249 return sequences, body 1.250 - 1.251 + 1.252 @staticmethod 1.253 def from_sequences(*sequences): 1.254 """ Constructs new alignment from sequences 1.255 - 1.256 - Add None's to right end to make equal lengthes of alignment sequences 1.257 + 1.258 + Add None's to right end to make equal lengthes of alignment sequences 1.259 """ 1.260 alignment = Alignment() 1.261 alignment.sequences = sequences 1.262 @@ -474,18 +474,18 @@ 1.263 gaps_count = max_length - len(sequence) 1.264 alignment.body[sequence] = sequence.monomers + [None] * gaps_count 1.265 return alignment 1.266 - 1.267 + 1.268 def save_fasta(self, out_file, long_line=70, gap='-'): 1.269 """ Saves alignment to given file 1.270 - 1.271 + 1.272 Splits long lines to substrings of length=long_line 1.273 - To prevent this, set long_line=None 1.274 + To prevent this, set long_line=None 1.275 """ 1.276 block.Block(self).save_fasta(out_file, long_line=long_line, gap=gap) 1.277 - 1.278 + 1.279 def muscle_align(self): 1.280 """ Simple align ths alignment using sequences (muscle) 1.281 - 1.282 + 1.283 uses old Monomers and Sequences objects 1.284 """ 1.285 tmp_file = NamedTemporaryFile(delete=False) 1.286 @@ -510,13 +510,13 @@ 1.287 raise Exception("Align: alignment errors") 1.288 self.body[sequence].append(old_monomer) 1.289 os.unlink(tmp_file.name) 1.290 - 1.291 + 1.292 def column(self, sequence=None, sequences=None, original=None): 1.293 """ returns list of columns of alignment 1.294 - 1.295 + 1.296 sequence or sequences: 1.297 if sequence is given, then column is (original_monomer, monomer) 1.298 - if sequences is given, then column is (original_monomer, {sequence: monomer}) 1.299 + if sequences is given, then column is (original_monomer, {sequence: monomer}) 1.300 if both of them are given, it is an error 1.301 original (Sequence type): 1.302 if given, this filters only columns represented by original sequence 1.303 @@ -532,18 +532,18 @@ 1.304 if sequence: 1.305 yield (column[indexes[original]], column[indexes[sequence]]) 1.306 else: 1.307 - yield (column[indexes[original]], 1.308 + yield (column[indexes[original]], 1.309 dict([(s, column[indexes[s]]) for s in sequences])) 1.310 - 1.311 + 1.312 def secstr(self, sequence, pdb_chain, gap='-'): 1.313 """ Returns string representing secondary structure """ 1.314 return ''.join([ 1.315 - (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap) 1.316 + (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap) 1.317 for m in self.body[sequence]]) 1.318 1.319 class Block(object): 1.320 """ Block of alignment 1.321 - 1.322 + 1.323 Mandatory data: 1.324 1.325 * self.alignment -- alignment object, which the block belongs to 1.326 @@ -551,19 +551,19 @@ 1.327 and/or gaps, that constitute the block 1.328 * self.positions -- list of positions of the alignment.body that 1.329 are included in the block; position[i+1] is always to the right from position[i] 1.330 - 1.331 + 1.332 Don't change self.sequences -- it may be a link to other block.sequences 1.333 - 1.334 + 1.335 How to create a new block: 1.336 >>> import alignment 1.337 >>> import block 1.338 >>> proj = alignment.Alignment(open("test.fasta")) 1.339 >>> block1 = block.Block(proj) 1.340 """ 1.341 - 1.342 + 1.343 def __init__(self, alignment, sequences=None, positions=None): 1.344 """ Builds new block from alignment 1.345 - 1.346 + 1.347 if sequences==None, all sequences are used 1.348 if positions==None, all positions are used 1.349 """ 1.350 @@ -574,10 +574,10 @@ 1.351 self.alignment = alignment 1.352 self.sequences = sequences 1.353 self.positions = positions 1.354 - 1.355 + 1.356 def save_fasta(self, out_file, long_line=70, gap='-'): 1.357 - """ Saves alignment to given file in fasta-format 1.358 - 1.359 + """ Saves alignment to given file in fasta-format 1.360 + 1.361 No changes in the names, descriptions or order of the sequences 1.362 are made. 1.363 """ 1.364 @@ -586,13 +586,13 @@ 1.365 block_monomers = [alignment_monomers[i] for i in self.positions] 1.366 string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) 1.367 save_fasta(out_file, string, sequence.name, sequence.description, long_line) 1.368 - 1.369 - def geometrical_cores(self, max_delta=config.delta, 1.370 - timeout=config.timeout, minsize=config.minsize, 1.371 + 1.372 + def geometrical_cores(self, max_delta=config.delta, 1.373 + timeout=config.timeout, minsize=config.minsize, 1.374 ac_new_atoms=config.ac_new_atoms, 1.375 ac_count=config.ac_count): 1.376 """ Returns length-sorted list of blocks, representing GCs 1.377 - 1.378 + 1.379 max_delta -- threshold of distance spreading 1.380 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) 1.381 minsize -- min size of each core 1.382 @@ -603,7 +603,7 @@ 1.383 ac_count -- max number of cores (including main core) 1.384 -1 means infinity 1.385 If more than one pdb chain for some sequence provided, consider all of them 1.386 - cost is calculated as 1 / (delta + 1) 1.387 + cost is calculated as 1 / (delta + 1) 1.388 delta in [0, +inf) => cost in (0, 1] 1.389 """ 1.390 nodes = self.positions 1.391 @@ -639,37 +639,37 @@ 1.392 if ac_count != -1 and len(GCs) >= ac_count: 1.393 break 1.394 return GCs 1.395 - 1.396 + 1.397 def xstring(self, x='X', gap='-'): 1.398 """ Returns string consisting of gap chars and chars x at self.positions 1.399 - 1.400 + 1.401 Length of returning string = length of alignment 1.402 """ 1.403 monomers = [False] * len(self.alignment) 1.404 for i in self.positions: 1.405 monomers[i] = True 1.406 return ''.join([x if m else gap for m in monomers]) 1.407 - 1.408 + 1.409 def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70): 1.410 """ Save xstring and name in fasta format """ 1.411 save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line) 1.412 - 1.413 + 1.414 def monomers(self, sequence): 1.415 """ Iterates monomers of this sequence from this block """ 1.416 alignment_sequence = self.alignment.body[sequence] 1.417 return (alignment_sequence[i] for i in self.positions) 1.418 - 1.419 + 1.420 def ca_atoms(self, sequence, pdb_chain): 1.421 """ Iterates Ca-atom of monomers of this sequence from this block """ 1.422 return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers()) 1.423 - 1.424 + 1.425 def sequences_chains(self): 1.426 """ Iterates pairs (sequence, chain) """ 1.427 for sequence in self.alignment.sequences: 1.428 if sequence in self.sequences: 1.429 for chain in sequence.pdb_chains: 1.430 yield (sequence, chain) 1.431 - 1.432 + 1.433 def superimpose(self): 1.434 """ Superimpose all pdb_chains in this block """ 1.435 sequences_chains = list(self.sequences_chains()) 1.436 @@ -682,22 +682,22 @@ 1.437 sup.set_atoms(fixed_atoms, moving_atoms) 1.438 # Apply rotation/translation to the moving atoms 1.439 sup.apply(moving_atoms) 1.440 - 1.441 + 1.442 def pdb_save(self, out_file): 1.443 - """ Save all sequences 1.444 - 1.445 + """ Save all sequences 1.446 + 1.447 Returns {(sequence, chain): CHAIN} 1.448 CHAIN is chain letter in new file 1.449 """ 1.450 tmp_file = NamedTemporaryFile(delete=False) 1.451 tmp_file.close() 1.452 - 1.453 + 1.454 for sequence, chain in self.sequences_chains(): 1.455 sequence.pdb_save(tmp_file.name, chain) 1.456 # TODO: read from tmp_file.name 1.457 # change CHAIN 1.458 # add to out_file 1.459 - 1.460 + 1.461 os.unlink(NamedTemporaryFile) 1.462 1.463 # vim: set ts=4 sts=4 sw=4 et:
2.1 --- a/allpy/data/__init__.py Wed Dec 15 02:22:38 2010 +0300 2.2 +++ b/allpy/data/__init__.py Wed Dec 15 18:30:19 2010 +0300 2.3 @@ -1,3 +1,3 @@ 2.4 -""" 2.5 +""" 2.6 Module that contains various data relevant to biological sequences. 2.7 """
3.1 --- a/allpy/fasta.py Wed Dec 15 02:22:38 2010 +0300 3.2 +++ b/allpy/fasta.py Wed Dec 15 18:30:19 2010 +0300 3.3 @@ -1,10 +1,10 @@ 3.4 def save_fasta(out_file, string, name, description='', long_line=70): 3.5 - """ Saves given string to out_file in fasta_format 3.6 - 3.7 + """ Saves given string to out_file in fasta_format 3.8 + 3.9 Splits long lines to substrings of length=long_line 3.10 - To prevent this, set long_line=None 3.11 + To prevent this, set long_line=None 3.12 """ 3.13 - out_file.write(">%(name)s %(description)s \n" % {'name':name, 'description':description}) 3.14 + out_file.write(">%(name)s %(description)s \n" % {'name':name, 'description':description}) 3.15 if long_line: 3.16 for i in range(0, len(string) // long_line + 1): 3.17 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
4.1 --- a/allpy/graph.py Wed Dec 15 02:22:38 2010 +0300 4.2 +++ b/allpy/graph.py Wed Dec 15 18:30:19 2010 +0300 4.3 @@ -1,4 +1,4 @@ 4.4 -# -*- coding: utf-8 -*- 4.5 +# -*- coding: utf-8 -*- 4.6 4.7 from datetime import datetime, timedelta 4.8 from copy import copy 4.9 @@ -10,13 +10,13 @@ 4.10 4.11 class Graph(object): 4.12 """ Undirected weighted graph 4.13 - 4.14 + 4.15 Data: 4.16 nodes -- set of elements 4.17 - lines -- {line: cost}. 4.18 + lines -- {line: cost}. 4.19 line is frozenset([e1, e2]) 4.20 cost is float in (0, 1] or 1 (if all lines are equal) 4.21 - 4.22 + 4.23 >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1}) 4.24 >>> g.fast_cliques() 4.25 Fast algorithm started 4.26 @@ -37,7 +37,7 @@ 4.27 Bron and Kerbosh algorithm started 4.28 [frozenset([1, 2, 3])] 4.29 """ 4.30 - 4.31 + 4.32 def __init__(self, nodes=None, lines=None): 4.33 if not nodes: 4.34 nodes = set() 4.35 @@ -48,25 +48,25 @@ 4.36 for line, cost in lines.items(): 4.37 if len(line) == 2 and line.issubset(self.nodes): 4.38 self.lines[line] = cost 4.39 - 4.40 + 4.41 @staticmethod 4.42 def line(k1, k2): 4.43 """ Construct object, representing line of graph """ 4.44 return frozenset([k1, k2]) 4.45 - 4.46 + 4.47 def bounded(self, k1, k2): 4.48 """ Return if these two nodes of the graph are bounded with line """ 4.49 return k1 == k2 or Graph.line(k1, k2) in self.lines 4.50 - 4.51 + 4.52 def count_one(self, node): 4.53 """ Returns number of connections of this node """ 4.54 return len([node1 for node1 in self.nodes if self.bounded(node, node1)]) - 1 4.55 - 4.56 + 4.57 def cost_one(self, node): 4.58 """ Returns sum of costs of all connections of this node """ 4.59 - return sum([self.lines.get(Graph.line(node, node1), 0) 4.60 + return sum([self.lines.get(Graph.line(node, node1), 0) 4.61 for node1 in self.nodes if node != node1]) 4.62 - 4.63 + 4.64 def count_all(self): 4.65 """ Returns {node: number of connections of this node} """ 4.66 c = dict([(node, 0) for node in self.nodes]) 4.67 @@ -74,17 +74,17 @@ 4.68 for node in line: 4.69 c[node] += 1 4.70 return c 4.71 - 4.72 - 4.73 + 4.74 + 4.75 def drop_node(self, node): 4.76 """ Remove node and all involved lines """ 4.77 for node1 in self.nodes: 4.78 self.lines.pop(Graph.line(node, node1), None) 4.79 self.nodes.discard(node) 4.80 - 4.81 + 4.82 def add_node(self, node, parent_graph): 4.83 """ Add node and corresponding lines from parent_graph 4.84 - 4.85 + 4.86 Added lines should be contained in self graph 4.87 (takes care of hanging lines) 4.88 """ 4.89 @@ -93,65 +93,65 @@ 4.90 line = Graph.line(node, node1) 4.91 if line in parent_graph.lines: 4.92 self.lines[line] = parent_graph.lines[line] 4.93 - 4.94 + 4.95 def drop_nodes(self, nodes): 4.96 """ Run drop_node for each of given nodes 4.97 - 4.98 + 4.99 Returns if nodes was not empty (ugly beauty) 4.100 """ 4.101 for node in nodes: 4.102 self.drop_node(node) 4.103 return bool(nodes) 4.104 - 4.105 + 4.106 def drop_if_count(self, minsize): 4.107 """ Run drop_node for each node, that has less than minsize lines """ 4.108 while True: 4.109 - if not self.drop_nodes([node for (node, count) 4.110 + if not self.drop_nodes([node for (node, count) 4.111 in self.count_all().items() if count < minsize]): 4.112 break 4.113 - 4.114 + 4.115 def bron_kerbosh(self, timeout=-1, minsize=1): 4.116 """ Bron and Kerboch algorithm implementation 4.117 - 4.118 + 4.119 returns list of cliques 4.120 clique is frozenset 4.121 if timeout=-1, it means infinity 4.122 if timeout has happened, raises TimeoutError 4.123 - 4.124 + 4.125 lava flow 4.126 """ 4.127 print 'Bron and Kerbosh algorithm started' 4.128 cliques = [] 4.129 - 4.130 + 4.131 depth = 0 4.132 list_candidates = [copy(self.nodes)] 4.133 list_used = [set()] 4.134 compsub = [] 4.135 - 4.136 + 4.137 start_time = datetime.now() 4.138 timeout_timedelta = timedelta(timeout) 4.139 - 4.140 + 4.141 while True: # ПОКА... 4.142 if depth == -1: 4.143 break # ВСЕ! Все рекурсии (итерации) пройдены 4.144 candidates = copy(list_candidates[depth]) 4.145 - used = copy(list_used[depth]) 4.146 + used = copy(list_used[depth]) 4.147 if not candidates: # ПОКА candidates НЕ пусто 4.148 depth -= 1 4.149 if compsub: 4.150 - compsub.pop() 4.151 + compsub.pop() 4.152 continue 4.153 - 4.154 + 4.155 # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates 4.156 # (все из used НЕ соединены хотя бы с 1 из candidates) 4.157 used_candidates = False 4.158 - 4.159 + 4.160 for used1 in used: 4.161 for candidates1 in candidates: 4.162 if not self.bounded(used1, candidates1): 4.163 break 4.164 else: 4.165 - used_candidates = True 4.166 + used_candidates = True 4.167 4.168 if used_candidates: 4.169 depth -= 1 4.170 @@ -159,7 +159,7 @@ 4.171 if compsub: 4.172 compsub.pop() 4.173 continue 4.174 - 4.175 + 4.176 # Выбираем вершину v из candidates и добавляем ее в compsub 4.177 v = candidates.pop() 4.178 candidates.add(v) 4.179 @@ -175,9 +175,9 @@ 4.180 for used1 in used: 4.181 if self.bounded(used1, v) and used1 != v: 4.182 new_used.add(used1) 4.183 - 4.184 + 4.185 # Удаляем v из candidates и помещаем в used 4.186 - list_candidates[depth].remove(v) 4.187 + list_candidates[depth].remove(v) 4.188 list_used[depth].add(v) 4.189 # ЕСЛИ new_candidates и new_used пусты 4.190 if not new_candidates and not new_used: 4.191 @@ -193,39 +193,39 @@ 4.192 if datetime.now() - start_time > timeout_timedelta: 4.193 raise TimeoutError 4.194 # TIMEOUT check end 4.195 - 4.196 + 4.197 if depth >= len(list_candidates): 4.198 list_candidates.append(set()) 4.199 list_used.append(set()) 4.200 4.201 list_candidates[depth] = copy(new_candidates) 4.202 list_used[depth] = copy(new_used) 4.203 - 4.204 + 4.205 continue 4.206 - 4.207 + 4.208 # Удаляем v из compsub 4.209 if compsub: 4.210 compsub.pop() 4.211 - 4.212 + 4.213 return cliques 4.214 4.215 - 4.216 + 4.217 def fast_cliques(self, minsize=1): 4.218 """ returns list of cliques 4.219 - 4.220 + 4.221 clique is frozenset 4.222 """ 4.223 print 'Fast algorithm started' 4.224 cliques = [] 4.225 - 4.226 + 4.227 while True: 4.228 graph = Graph(self.nodes, self.lines) 4.229 for clique in cliques: 4.230 graph.drop_nodes(clique) 4.231 if not graph.nodes: 4.232 break 4.233 - 4.234 - while True: 4.235 + 4.236 + while True: 4.237 # drop nodes, while its is possible 4.238 if len(graph.nodes) == 1: 4.239 break 4.240 @@ -234,14 +234,14 @@ 4.241 bad_nodes = [node for (node, count) in c.items() if count == min_count] 4.242 if len(bad_nodes) == len(graph.nodes) and min_count != 0: 4.243 break 4.244 - 4.245 + 4.246 costs = dict([(node, graph.cost_one(node)) for node in bad_nodes]) 4.247 min_cost = min(costs.values()) 4.248 for node, cost in costs.items(): 4.249 if cost == min_cost: 4.250 graph.drop_node(node) 4.251 break 4.252 - 4.253 + 4.254 while True: 4.255 # add nodes, while its is possible 4.256 candidats = {} 4.257 @@ -253,29 +253,29 @@ 4.258 candidats[node] = graph1.cost_one(node) 4.259 if not candidats: 4.260 break 4.261 - 4.262 + 4.263 max_cost = max(candidats.values()) 4.264 node = [node for (node, cost) in candidats.items() if cost == max_cost][0] 4.265 graph.add_node(node, self) 4.266 - 4.267 + 4.268 cliques.append(frozenset(graph.nodes)) 4.269 - 4.270 + 4.271 return cliques 4.272 - 4.273 - 4.274 + 4.275 + 4.276 def cliques(self, timeout=-1, minsize=1): 4.277 """ returns length-sorted list of cliques 4.278 - 4.279 + 4.280 clique is frozenset 4.281 - 4.282 + 4.283 can change self! 4.284 - 4.285 + 4.286 try to execute bron_kerbosh 4.287 if it raises TimeoutError, executes fast_cliques 4.288 """ 4.289 - 4.290 + 4.291 self.drop_if_count(minsize) 4.292 - 4.293 + 4.294 try: 4.295 cliques = self.bron_kerbosh(timeout, minsize) 4.296 cliques.sort(key=lambda clique: len(clique), reverse=True)
5.1 --- a/allpy/pdb.py Wed Dec 15 02:22:38 2010 +0300 5.2 +++ b/allpy/pdb.py Wed Dec 15 18:30:19 2010 +0300 5.3 @@ -23,7 +23,7 @@ 5.4 else: 5.5 return "%s_%s" % \ 5.6 (pdb_id.lower().strip(), pdb_chain.upper().strip()) 5.7 - 5.8 + 5.9 def pdb_id_parse(ID): 5.10 match = re1.search(ID) 5.11 if not match: 5.12 @@ -34,11 +34,11 @@ 5.13 if 'model' not in d or not d['model']: 5.14 d['model'] = 0 5.15 return d 5.16 - 5.17 - 5.18 + 5.19 + 5.20 def get_structure(file, name): 5.21 return PDBParser().get_structure(name, file) 5.22 - 5.23 + 5.24 #~ def std_id_parse(ID): 5.25 #~ """ 5.26 #~ Parse standart ID to pdb_code, chain and model 5.27 @@ -64,4 +64,4 @@ 5.28 #~ if parse[5]: 5.29 #~ model = parse[5] 5.30 #~ return code, chain, model 5.31 - 5.32 +
6.1 --- a/blocks3d/AlignmentSeq.py Wed Dec 15 02:22:38 2010 +0300 6.2 +++ b/blocks3d/AlignmentSeq.py Wed Dec 15 18:30:19 2010 +0300 6.3 @@ -16,7 +16,7 @@ 6.4 l=AlignmentSeq(seq_in) 6.5 for t in l.new_sequences: 6.6 print t 6.7 - 6.8 + 6.9 """ 6.10 6.11 6.12 @@ -44,7 +44,7 @@ 6.13 6.14 self.common - ?????? [???????] => [?????? ????, ?????? ??????? ?????] 6.15 """ 6.16 - 6.17 + 6.18 6.19 self.old_sequences = [] 6.20 self.new_sequences = [] 6.21 @@ -56,20 +56,20 @@ 6.22 6.23 self.common=[] 6.24 6.25 - 6.26 + 6.27 for i in xrange(0,len(self.old_sequences)): 6.28 self.unite(i) 6.29 6.30 6.31 for i in xrange(0,len(self.old_sequences)): 6.32 - self.lining(i) 6.33 -## 6.34 + self.lining(i) 6.35 +## 6.36 ## return 6.37 ## 6.38 ## self.correct() 6.39 -## 6.40 ## 6.41 -## 6.42 +## 6.43 +## 6.44 ## 6.45 ## for str1 in self.new_sequences: 6.46 ## self.connections.append(self.recount(str1)) 6.47 @@ -79,7 +79,7 @@ 6.48 6.49 6.50 6.51 - 6.52 + 6.53 def cost(self,a1,a2): 6.54 6.55 """ 6.56 @@ -90,7 +90,7 @@ 6.57 6.58 global matrix 6.59 global gaps 6.60 - 6.61 + 6.62 a1=a1.upper() 6.63 a2=a2.upper() 6.64 6.65 @@ -99,8 +99,6 @@ 6.66 return matrix[a1][a2] 6.67 6.68 return gaps[0] 6.69 - 6.70 - 6.71 6.72 6.73 6.74 @@ -108,7 +106,9 @@ 6.75 6.76 6.77 6.78 - 6.79 + 6.80 + 6.81 + 6.82 def gap_cost(self,int1): 6.83 """ 6.84 ??????? ????????? ????? ???????????? ????? ? 6.85 @@ -118,13 +118,13 @@ 6.86 6.87 global gaps 6.88 6.89 - 6.90 + 6.91 if (int1 >= len(gaps)): 6.92 return gaps[(len(gaps)-1)] 6.93 else: 6.94 return gaps[int1] 6.95 6.96 - 6.97 + 6.98 6.99 6.100 6.101 @@ -151,7 +151,7 @@ 6.102 str1=self.old_sequences[chainN] 6.103 len1=len(str1) 6.104 6.105 - 6.106 + 6.107 6.108 6.109 if (not self.common): 6.110 @@ -159,10 +159,10 @@ 6.111 while (i<len1): 6.112 aminoacids=[str1[i]] 6.113 chains=[chainN] 6.114 - 6.115 + 6.116 self.common.append([aminoacids,chains]) 6.117 i+=1 6.118 - 6.119 + 6.120 return 6.121 6.122 6.123 @@ -188,7 +188,7 @@ 6.124 tip_from[i].append(0) 6.125 6.126 6.127 -## 6.128 +## 6.129 ## for i in xrange(1,len1+1): 6.130 ## d[i][0] = 0 6.131 ## already_gaps[i][0] = [0,1+i] 6.132 @@ -197,7 +197,7 @@ 6.133 ## for j in xrange(1,len2+1): 6.134 ## d[0][j] = 0 6.135 ## already_gaps[0][j] = [1+j,0] 6.136 -## 6.137 +## 6.138 6.139 6.140 6.141 @@ -212,12 +212,12 @@ 6.142 costs.append(self.cost(str1[i-1],A)) 6.143 cost = max(costs) 6.144 6.145 - 6.146 + 6.147 insertion = d[i-1][j] 6.148 if (j != len2): # ?????????? ??? 6.149 insertion += self.gap_cost(already_gaps[i-1][j][1]) 6.150 6.151 - 6.152 + 6.153 deletion = d[i][j-1] 6.154 if (i != len1): # ?????????? ??? 6.155 deletion += self.gap_cost(already_gaps[i][j-1][0]) 6.156 @@ -229,32 +229,32 @@ 6.157 d[i][j] = max_way 6.158 6.159 6.160 - 6.161 - 6.162 - 6.163 + 6.164 + 6.165 + 6.166 if (max_way==substitution): 6.167 - tip=3 6.168 + tip=3 6.169 if (max_way==deletion): 6.170 - tip=2 6.171 + tip=2 6.172 if (max_way==insertion): 6.173 tip=1 6.174 - 6.175 + 6.176 6.177 6.178 if (tip==1): # insertion 6.179 already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ] 6.180 6.181 - 6.182 + 6.183 if (tip==2): # deletion 6.184 already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ] 6.185 6.186 if (tip==3): # substitution 6.187 already_gaps[i][j]=[ 0, 0 ] 6.188 - 6.189 + 6.190 tip_from[i][j]=tip 6.191 6.192 6.193 -## 6.194 +## 6.195 ## for d1 in d: 6.196 ## for d11 in d1: 6.197 ## print d11, 6.198 @@ -262,24 +262,24 @@ 6.199 ## 6.200 ## 6.201 ## 6.202 -## 6.203 +## 6.204 ## for d1 in tip_from: 6.205 ## for d11 in d1: 6.206 ## print d11, 6.207 ## print 6.208 ## 6.209 -## 6.210 +## 6.211 ## 6.212 ## print insertion 6.213 ## print deletion 6.214 ## print substitution 6.215 ## 6.216 ## 6.217 -## 6.218 +## 6.219 ## print already_gaps 6.220 -## 6.221 +## 6.222 6.223 - 6.224 + 6.225 6.226 i=len1 6.227 j=len2 6.228 @@ -288,43 +288,43 @@ 6.229 6.230 while (i>0 or j>0): 6.231 tip=tip_from[i][j] 6.232 - 6.233 + 6.234 if tip==1 or (j==0 and i>0): 6.235 6.236 aminoacids=[(str1[i-1])] 6.237 chains=[chainN] 6.238 - 6.239 + 6.240 common.append([aminoacids,chains]) 6.241 - 6.242 + 6.243 i-=1 6.244 6.245 6.246 - 6.247 + 6.248 if tip==2 or (i==0 and j>0): 6.249 - 6.250 + 6.251 common.append(self.common[j-1]) 6.252 j-=1 6.253 6.254 - 6.255 + 6.256 if (tip==3): 6.257 - 6.258 + 6.259 chains=self.common[j-1][1] 6.260 chains.append(chainN) 6.261 - 6.262 + 6.263 aminoacids=self.common[j-1][0] 6.264 - 6.265 + 6.266 if (not aminoacids.count(str1[i-1])): 6.267 aminoacids.append(str1[i-1]) 6.268 6.269 common.append([aminoacids,chains]) 6.270 - 6.271 + 6.272 i-=1 6.273 j-=1 6.274 6.275 - 6.276 - 6.277 + 6.278 + 6.279 common.reverse() 6.280 - 6.281 + 6.282 self.common=common 6.283 6.284 6.285 @@ -356,12 +356,12 @@ 6.286 ????? ??????? ????? ??????????? ?????????????????? 6.287 ? self.new_sequences 6.288 6.289 - chainN - ????? ???? 6.290 + chainN - ????? ???? 6.291 """ 6.292 6.293 str1=self.old_sequences[chainN] 6.294 len1=len(str1) 6.295 - 6.296 + 6.297 len2=len(self.common) 6.298 6.299 6.300 @@ -390,28 +390,28 @@ 6.301 ## def correct(self): 6.302 ## 6.303 ## new_sequences=[] 6.304 -## 6.305 +## 6.306 ## all_l = len(self.new_sequences[0]) # ?????????? ?? ???? ??????????????????? 6.307 ## for str1 in self.new_sequences: 6.308 ## all_l = min (all_l,len(str1)) 6.309 ## 6.310 ## 6.311 -## 6.312 +## 6.313 ## i=0 6.314 ## while (i < all_l): 6.315 -## 6.316 +## 6.317 ## if (i==0): 6.318 ## for str1 in self.new_sequences: 6.319 ## new_sequences.append(str1[0]) 6.320 ## i+=1 6.321 ## continue 6.322 ## 6.323 -## 6.324 +## 6.325 ## all_gaps=1 6.326 ## for str1 in self.new_sequences: 6.327 ## if (str1[i]!='-'): 6.328 ## all_gaps=0 6.329 -## 6.330 +## 6.331 ## if (all_gaps==1): 6.332 ## i+=1 6.333 ## continue 6.334 @@ -424,10 +424,10 @@ 6.335 ## if (str1[i]!='-' and new_sequences[j][-1]!='-'): 6.336 ## sovmest=0 6.337 ## break 6.338 -## 6.339 -## 6.340 ## 6.341 -## 6.342 +## 6.343 +## 6.344 +## 6.345 ## j=-1 6.346 ## for str1 in self.new_sequences: 6.347 ## j+=1 6.348 @@ -437,7 +437,7 @@ 6.349 ## new_sequences[j] = new_sequences[j][:-1] + str1[i] 6.350 ## else: 6.351 ## new_sequences[j] = new_sequences[j] + str1[i] 6.352 -## 6.353 +## 6.354 ## 6.355 ## 6.356 ## i+=1 6.357 @@ -446,7 +446,7 @@ 6.358 ## self.new_sequences = new_sequences 6.359 6.360 6.361 - 6.362 + 6.363 6.364 6.365 6.366 @@ -467,7 +467,7 @@ 6.367 ## """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 6.368 ## def recount(self,str1): 6.369 ## spisok={} 6.370 -## 6.371 +## 6.372 ## len1=len(str1) 6.373 ## old_nomer = 0 6.374 ## 6.375 @@ -487,4 +487,4 @@ 6.376 6.377 6.378 6.379 - 6.380 +
7.1 --- a/blocks3d/Blocks3D.py Wed Dec 15 02:22:38 2010 +0300 7.2 +++ b/blocks3d/Blocks3D.py Wed Dec 15 18:30:19 2010 +0300 7.3 @@ -23,7 +23,7 @@ 7.4 # determine html.htm path 7.5 if hasattr(Blocks3D_config, 'html_file'): 7.6 html_file = Blocks3D_config.html_file 7.7 - 7.8 + 7.9 else: 7.10 7.11 if sys.argv[0].replace('/', '') != sys.argv[0]: 7.12 @@ -129,8 +129,8 @@ 7.13 except: 7.14 all_right = 0 7.15 print "Error: bad option" 7.16 - 7.17 - 7.18 + 7.19 + 7.20 7.21 if s: 7.22 save = 0 7.23 @@ -159,7 +159,7 @@ 7.24 7.25 #if not Super_core: 7.26 # Super_core = 0 7.27 - 7.28 + 7.29 #Super_core = int(Super_core) 7.30 7.31 7.32 @@ -175,7 +175,7 @@ 7.33 print "Error: can not find input file" 7.34 except: 7.35 all_right = 0 7.36 - print "Error: can not find input file" 7.37 + print "Error: can not find input file" 7.38 7.39 7.40 if not (output_html_file): 7.41 @@ -209,7 +209,7 @@ 7.42 if not all_right: 7.43 7.44 print ' ' 7.45 - 7.46 + 7.47 print 'Programm for find blocks of true alignment' 7.48 7.49 print ' ' 7.50 @@ -276,14 +276,14 @@ 7.51 #if text.count("\n>") >= 2: 7.52 if text.count("\n>") >= 1: 7.53 # fasta 7.54 - 7.55 + 7.56 # write to temp file input fasta alignment 7.57 open(GC_temp, 'w').write(text) 7.58 - 7.59 + 7.60 else: 7.61 7.62 # not fasta 7.63 - 7.64 + 7.65 # convert input file to fasta 7.66 7.67 try: 7.68 @@ -303,7 +303,7 @@ 7.69 7.70 7.71 7.72 - 7.73 + 7.74 7.75 7.76 7.77 @@ -322,9 +322,9 @@ 7.78 current_id = '' # current fasta identifier 7.79 7.80 for fasta_string in fasta: 7.81 - 7.82 + 7.83 fasta_string = fasta_string.strip() 7.84 - 7.85 + 7.86 if not len(fasta_string): 7.87 continue 7.88 7.89 @@ -345,14 +345,14 @@ 7.90 # read conformity file 7.91 7.92 7.93 -conformity_dict = {} # Key - fasta_id. Value - list [pdb_code, pdb_chain] 7.94 +conformity_dict = {} # Key - fasta_id. Value - list [pdb_code, pdb_chain] 7.95 7.96 7.97 if conformity_file: 7.98 conformity = open(conformity_file).readlines() 7.99 7.100 for conformity_string in conformity: 7.101 - 7.102 + 7.103 if len(conformity_string) < 8: 7.104 continue 7.105 7.106 @@ -363,12 +363,12 @@ 7.107 exit() 7.108 7.109 fasta_id = conformity_list[0] 7.110 - 7.111 + 7.112 ## if not fasta_dict.has_key(fasta_id): 7.113 ## print 'Error: unknown fasta identifier "' + fasta_id + '" in conformity file' 7.114 -## exit() 7.115 +## exit() 7.116 7.117 - 7.118 + 7.119 conformity_pdb = conformity_list[1] 7.120 7.121 # if len(conformity_pdb) != 2: 7.122 @@ -379,7 +379,7 @@ 7.123 # print 'Error: can not read PDB code and chain "' + conformity_list[1] + '" in conformity file' 7.124 # exit() 7.125 7.126 - 7.127 + 7.128 conformity_dict[fasta_id] = conformity_pdb 7.129 7.130 7.131 @@ -392,16 +392,16 @@ 7.132 # ID_user[ID] = ID, entered by user 7.133 ID_user = {} 7.134 7.135 - 7.136 + 7.137 for fasta_id, sequence in fasta_dict.items(): 7.138 - 7.139 + 7.140 if conformity_dict.has_key(fasta_id): 7.141 fasta_id = conformity_dict[fasta_id] 7.142 - 7.143 - 7.144 + 7.145 + 7.146 pdb = B3D.from_ID(fasta_id) 7.147 - 7.148 - 7.149 + 7.150 + 7.151 7.152 if not pdb: 7.153 print 'Error: sequence identifier "' + fasta_id + '" in not valid' 7.154 @@ -411,7 +411,7 @@ 7.155 pdb_code, pdb_chain, model = pdb 7.156 7.157 block.append([pdb_code, pdb_chain, sequence, model]) 7.158 - 7.159 + 7.160 # save ID, entered by user 7.161 ID = B3D.to_ID(pdb_code, pdb_chain, model) 7.162 ID_user[ID] = fasta_id 7.163 @@ -431,8 +431,8 @@ 7.164 # run GeometricalCore_class to make core 7.165 ############################################## 7.166 7.167 - 7.168 - 7.169 + 7.170 + 7.171 B3D.min_width = min_width 7.172 B3D.timeout_2 = timeout_2 7.173 7.174 @@ -444,7 +444,7 @@ 7.175 B3D.timeout = timeout 7.176 B3D.pdb_url = pdb_url 7.177 7.178 - 7.179 + 7.180 # blocks is object with result 7.181 blocks = B3D.find_blocks(block) 7.182 7.183 @@ -455,13 +455,13 @@ 7.184 for block in blocks: 7.185 7.186 IDs_1 = [] 7.187 - 7.188 + 7.189 for ID in block['IDs']: 7.190 - 7.191 + 7.192 IDs_1.append(ID_user[ID]) 7.193 - 7.194 + 7.195 block['IDs'] = IDs_1 7.196 - 7.197 + 7.198 blocks_1.append(block) 7.199 7.200 7.201 @@ -470,25 +470,25 @@ 7.202 7.203 if output_html_file: 7.204 # produce HTML 7.205 - 7.206 + 7.207 t = '' 7.208 - 7.209 - 7.210 + 7.211 + 7.212 t += "blocks = json('" + json.dumps(blocks_1) + "');" 7.213 t += "fasta_dict = json('" + json.dumps(fasta_dict) + "');" 7.214 - 7.215 + 7.216 # t += "try{blocks_init();}catch(e){}" 7.217 - 7.218 - 7.219 - 7.220 + 7.221 + 7.222 + 7.223 html = ''.join(open(html_file).readlines()) 7.224 - 7.225 + 7.226 # insert out code instead of "self_js_text" 7.227 html = html.replace('self_js_text', t) 7.228 - 7.229 - 7.230 + 7.231 + 7.232 open(output_html_file, 'w').write(html) 7.233 - 7.234 + 7.235 7.236 7.237
8.1 --- a/blocks3d/Blocks3D_class.py Wed Dec 15 02:22:38 2010 +0300 8.2 +++ b/blocks3d/Blocks3D_class.py Wed Dec 15 18:30:19 2010 +0300 8.3 @@ -36,25 +36,25 @@ 8.4 8.5 User parameters: 8.6 8.7 - 8.8 + 8.9 """ 8.10 8.11 8.12 8.13 8.14 - 8.15 + 8.16 def __init__(self): 8.17 8.18 """ 8.19 Create new copy of class 8.20 """ 8.21 - 8.22 + 8.23 GC.__init__(self) 8.24 - 8.25 + 8.26 vars(self).update(vars(Blocks3D_config)) 8.27 - 8.28 - 8.29 - 8.30 + 8.31 + 8.32 + 8.33 8.34 8.35 8.36 @@ -67,7 +67,7 @@ 8.37 It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None] 8.38 SSNAKIDQLSSDAQ - part of alignment block sequence 8.39 Last parameter - model or None 8.40 - 8.41 + 8.42 8.43 Result: 8.44 returns list of blocks 8.45 @@ -77,7 +77,7 @@ 8.46 end: integer 8.47 IDs: list of IDs 8.48 """ 8.49 - 8.50 + 8.51 # # list of IDs 8.52 IDs = [] 8.53 8.54 @@ -85,109 +85,109 @@ 8.55 8.56 ID = self.to_ID(pdb_code, pdb_chain, model) 8.57 IDs.append(ID) 8.58 - 8.59 + 8.60 self.IDs = IDs 8.61 - 8.62 + 8.63 8.64 # build couple cores: 8.65 - 8.66 + 8.67 # CORES = <list of cores> 8.68 # core is list of positions 8.69 # CORES = [] 8.70 - 8.71 - 8.72 - 8.73 + 8.74 + 8.75 + 8.76 # couple_core_parts[ID_1][ID_2] = <list of parts> 8.77 # part = {'core': core_number, 'start': start_posotion, 'end': end_position} 8.78 couple_core_parts = {} 8.79 - 8.80 - 8.81 - 8.82 + 8.83 + 8.84 + 8.85 8.86 for protein_1 in alignment: 8.87 pdb_code_1, pdb_chain_1, seq_part_1, model_1 = protein_1 8.88 ID_1 = self.to_ID(pdb_code_1, pdb_chain_1, model_1) 8.89 8.90 couple_core_parts[ID_1] = {} 8.91 - 8.92 + 8.93 for protein_2 in alignment: 8.94 pdb_code_2, pdb_chain_2, seq_part_2, model_2 = protein_2 8.95 ID_2 = self.to_ID(pdb_code_2, pdb_chain_2, model_2) 8.96 - 8.97 + 8.98 if ID_1 == ID_2: 8.99 break 8.100 - 8.101 - 8.102 + 8.103 + 8.104 cores = self.check_block([protein_1, protein_2]) 8.105 - 8.106 + 8.107 # print ' ' 8.108 - 8.109 + 8.110 # print cores 8.111 - 8.112 - 8.113 + 8.114 + 8.115 couple_core_parts[ID_1][ID_2] = [] 8.116 - 8.117 + 8.118 all_cores = [] # list of positions of all cores 8.119 - 8.120 + 8.121 for core in cores: 8.122 - 8.123 + 8.124 parts = self.splice_to_continued(core, (ID_1, ID_2)) 8.125 - 8.126 + 8.127 for part in parts: 8.128 - 8.129 + 8.130 if len(part) >= self.min_width: 8.131 - 8.132 + 8.133 all_cores.extend(clon(part)) 8.134 - 8.135 + 8.136 all_cores = list(set(all_cores)) # unical elements 8.137 - 8.138 + 8.139 all_cores.sort() 8.140 - 8.141 - 8.142 + 8.143 + 8.144 parts = self.splice_to_continued(all_cores, (ID_1, ID_2)) 8.145 - 8.146 + 8.147 for part in parts: 8.148 - 8.149 + 8.150 if len(part) < self.min_width: 8.151 continue 8.152 - 8.153 + 8.154 couple_core_parts[ID_1][ID_2].append(clon({ 8.155 'start': part[0], 'end': part[-1]})) 8.156 8.157 couple_core_parts[ID_2][ID_1] = couple_core_parts[ID_1][ID_2] 8.158 - 8.159 + 8.160 # CORES.append(clon(core)) 8.161 - 8.162 - 8.163 - 8.164 + 8.165 + 8.166 + 8.167 # exit() 8.168 - 8.169 + 8.170 # list of elements of blocks graph 8.171 # element is (ID, pos) 8.172 graph_elements = [] 8.173 - 8.174 + 8.175 # reverse dict to graph_elements 8.176 graph_elements_1 = {} 8.177 - 8.178 - 8.179 + 8.180 + 8.181 # graph[i][j] == 1, if these elements are connected, or i == j 8.182 # i and j are indexes from list "graph_elements" 8.183 graph = {} 8.184 - 8.185 - 8.186 + 8.187 + 8.188 # same as graph, but include lists of cores n 8.189 # graph_cores = {} 8.190 - 8.191 - 8.192 - 8.193 + 8.194 + 8.195 + 8.196 # is_boundary[ID][pos] == 1, if pos is boundary of some part in ID 8.197 is_boundary = {} 8.198 - 8.199 - 8.200 - 8.201 + 8.202 + 8.203 + 8.204 for ID_1 in IDs: 8.205 - 8.206 - 8.207 + 8.208 + 8.209 # find boundaries 8.210 is_boundary[ID_1] = {} 8.211 for ID_2 in IDs: 8.212 @@ -196,70 +196,70 @@ 8.213 for part in couple_core_parts[ID_1][ID_2]: 8.214 is_boundary[ID_1][part['start']] = 1 8.215 is_boundary[ID_1][part['end']] = 1 8.216 - 8.217 8.218 - 8.219 - 8.220 + 8.221 + 8.222 + 8.223 for ID_2 in IDs: 8.224 - 8.225 + 8.226 if ID_1 == ID_2: 8.227 break 8.228 - 8.229 - 8.230 - 8.231 - 8.232 - 8.233 + 8.234 + 8.235 + 8.236 + 8.237 + 8.238 for part in couple_core_parts[ID_1][ID_2]: 8.239 - 8.240 + 8.241 start = part['start'] 8.242 end = part['end'] 8.243 - 8.244 + 8.245 # elements of graph like (ID, pos) 8.246 elements = [] 8.247 - 8.248 + 8.249 for pos in xrange(start, end + 1): 8.250 - 8.251 + 8.252 if not is_boundary[ID_1].has_key(pos) and not is_boundary[ID_2].has_key(pos): 8.253 continue 8.254 - 8.255 + 8.256 for ID in (ID_1, ID_2): 8.257 if not graph_elements_1.has_key((ID, pos)): 8.258 L = len(graph_elements) 8.259 graph_elements_1[(ID, pos)] = L 8.260 graph[L] = {} 8.261 # graph_cores[L] = {} 8.262 - 8.263 + 8.264 graph_elements.append((ID, pos)) 8.265 - 8.266 + 8.267 8.268 elements.append(graph_elements_1[(ID, pos)]) 8.269 - 8.270 - 8.271 + 8.272 + 8.273 for element_1 in elements: 8.274 for element_2 in elements: 8.275 graph[element_1][element_2] = 1 8.276 - 8.277 + 8.278 # # add information about cores 8.279 # if not graph_cores[element_1].has_key(element_2): 8.280 # graph_cores[element_1][element_2] = [] 8.281 # graph_cores[element_1][element_2].append(part['core']) 8.282 - 8.283 - 8.284 - 8.285 + 8.286 + 8.287 + 8.288 L = len(graph_elements) 8.289 - 8.290 + 8.291 for element_1 in xrange(0, L): 8.292 for element_2 in xrange(0, L): 8.293 if not graph[element_1].has_key(element_2): 8.294 graph[element_1][element_2] = 0 8.295 - 8.296 - 8.297 - 8.298 + 8.299 + 8.300 + 8.301 kliki_1 = Kliki(graph, min_size=0, timeout=self.timeout_2).kliki 8.302 - 8.303 - 8.304 -# 8.305 -# 8.306 + 8.307 + 8.308 +# 8.309 +# 8.310 # # graf illustration 8.311 # for i in graph.keys(): 8.312 # t = '' 8.313 @@ -269,441 +269,441 @@ 8.314 # else: 8.315 # t += ' ' 8.316 # print t 8.317 -# 8.318 -# 8.319 - 8.320 - 8.321 +# 8.322 +# 8.323 + 8.324 + 8.325 alignment_len = len(alignment[0][2]) 8.326 - 8.327 - 8.328 - 8.329 + 8.330 + 8.331 + 8.332 # occupied alignment elements 8.333 occupied = {} 8.334 - 8.335 + 8.336 for ID in IDs: 8.337 - 8.338 + 8.339 occupied[ID] = [0] * alignment_len 8.340 - 8.341 + 8.342 # 00000000 8.343 occupied_current0 = clon(occupied) 8.344 - 8.345 - 8.346 - 8.347 - 8.348 - 8.349 - 8.350 - 8.351 + 8.352 + 8.353 + 8.354 + 8.355 + 8.356 + 8.357 + 8.358 # print graph 8.359 - 8.360 + 8.361 # print kliki_1 8.362 - 8.363 - 8.364 + 8.365 + 8.366 rectangles = [] 8.367 - 8.368 + 8.369 for klika in kliki_1: 8.370 - 8.371 + 8.372 if self.rectangles_type == 'out': 8.373 - 8.374 - # transitivity 8.375 - 8.376 + 8.377 + # transitivity 8.378 + 8.379 IDs_dict = {} 8.380 start = alignment_len 8.381 end = 0 8.382 - 8.383 + 8.384 for element in klika: 8.385 - 8.386 + 8.387 ID, pos = graph_elements[element] 8.388 - 8.389 + 8.390 IDs_dict[ID] = 1 8.391 - 8.392 + 8.393 start = min(start, pos) 8.394 end = max(end, pos) 8.395 - 8.396 + 8.397 IDs = IDs_dict.keys() 8.398 - 8.399 - 8.400 + 8.401 + 8.402 rectangles.append({'IDs': IDs, 'start': start, 'end': end}) 8.403 - 8.404 + 8.405 else: 8.406 - 8.407 + 8.408 ID_start = {} 8.409 ID_end = {} 8.410 - 8.411 + 8.412 for element in klika: 8.413 - 8.414 + 8.415 ID, pos = graph_elements[element] 8.416 - 8.417 - 8.418 + 8.419 + 8.420 if not ID_start.has_key(ID): 8.421 ID_start[ID] = pos 8.422 ID_end[ID] = pos 8.423 else: 8.424 ID_start[ID] = min(ID_start[ID], pos) 8.425 ID_end[ID] = max(ID_end[ID], pos) 8.426 - 8.427 - 8.428 + 8.429 + 8.430 # empty table 8.431 occupied_current = clon(occupied_current0) 8.432 - 8.433 - 8.434 - 8.435 + 8.436 + 8.437 + 8.438 for ID, start in ID_start.items(): 8.439 end = ID_end[ID] 8.440 - 8.441 + 8.442 for pos in xrange(start, end + 1): 8.443 occupied_current[ID][pos] = 1 8.444 - 8.445 - 8.446 + 8.447 + 8.448 rectangles.extend(self.splice_to_rect(occupied_current)) 8.449 - 8.450 - 8.451 - 8.452 + 8.453 + 8.454 + 8.455 # sort by height and then by width 8.456 - 8.457 + 8.458 rectangles.sort(self.rectangles_compare) 8.459 - 8.460 - 8.461 - 8.462 - 8.463 - 8.464 - 8.465 - 8.466 - 8.467 - 8.468 + 8.469 + 8.470 + 8.471 + 8.472 + 8.473 + 8.474 + 8.475 + 8.476 + 8.477 rectangles_2 = [] 8.478 - 8.479 - 8.480 + 8.481 + 8.482 for rectangle in rectangles: 8.483 - 8.484 + 8.485 # empty table 8.486 occupied_current = clon(occupied_current0) 8.487 - 8.488 + 8.489 for ID in rectangle['IDs']: 8.490 - 8.491 + 8.492 for pos in xrange(rectangle['start'], rectangle['end'] + 1): 8.493 - 8.494 + 8.495 if not occupied[ID][pos]: 8.496 - 8.497 + 8.498 occupied_current[ID][pos] = 1 8.499 - 8.500 + 8.501 rectangles_current = self.splice_to_rect(occupied_current) 8.502 - 8.503 - 8.504 - 8.505 + 8.506 + 8.507 + 8.508 rectangles_2.extend(rectangles_current) 8.509 - 8.510 + 8.511 for rectangle in rectangles_current: 8.512 - 8.513 + 8.514 # if len(rectangle['IDs']) ==2 and rectangle['IDs'][1] == '1bw5_A_4': 8.515 # print occupied_current, rectangles 8.516 - 8.517 - 8.518 + 8.519 + 8.520 for ID in rectangle['IDs']: 8.521 for pos in xrange(rectangle['start'], rectangle['end'] + 1): 8.522 occupied[ID][pos] = 1 8.523 - 8.524 - 8.525 - 8.526 + 8.527 + 8.528 + 8.529 rectangles = clon(rectangles_2) 8.530 - 8.531 - 8.532 - 8.533 - 8.534 - 8.535 + 8.536 + 8.537 + 8.538 + 8.539 + 8.540 # delete blocks, including 1 sequence 8.541 - 8.542 + 8.543 rectangles = filter(self.rectangle_filter, rectangles) 8.544 - 8.545 - 8.546 + 8.547 + 8.548 return rectangles 8.549 - 8.550 - 8.551 - 8.552 -# 8.553 -# # transitivity 8.554 + 8.555 + 8.556 + 8.557 +# 8.558 +# # transitivity 8.559 # def splice_to_outrect(self, occupied_current, occupied): 8.560 # """ 8.561 # Splice multitude of alignment elements to rectangles 8.562 -# 8.563 +# 8.564 # Returns: 8.565 # new occupied_current (out rectangle) \ occupied 8.566 -# 8.567 +# 8.568 # """ 8.569 -# 8.570 +# 8.571 # width = len(occupied_current.values()[0]) 8.572 -# 8.573 +# 8.574 # start = width # the most right position 8.575 # end = 0 # the most left position 8.576 -# 8.577 +# 8.578 # IDs_dict = {} 8.579 -# 8.580 +# 8.581 # for ID, line in occupied_current.items(): 8.582 -# 8.583 +# 8.584 # for pos, condition in enumerate(line): 8.585 -# 8.586 +# 8.587 # if condition: 8.588 -# 8.589 +# 8.590 # IDs_dict[ID] = 1 8.591 # start = min(start, pos) 8.592 # end = max(end, pos) 8.593 -# 8.594 +# 8.595 # IDs = IDs_dict.keys() 8.596 -# 8.597 -# 8.598 -# 8.599 +# 8.600 +# 8.601 +# 8.602 # occupied_current_1 = {} 8.603 -# 8.604 -# 8.605 -# 8.606 +# 8.607 +# 8.608 +# 8.609 # for ID in IDs: 8.610 -# 8.611 +# 8.612 # occupied_current_1[ID] = [0] * width 8.613 -# 8.614 +# 8.615 # for pos in xrange(start, end + 1): 8.616 # 8.617 # if not occupied[ID][pos]: 8.618 # 8.619 # occupied_current_1[ID][pos] = 1 8.620 -# 8.621 +# 8.622 # return occupied_current_1 8.623 -# 8.624 -# 8.625 - 8.626 - 8.627 +# 8.628 +# 8.629 + 8.630 + 8.631 def rectangles_compare(self, x, y): 8.632 - 8.633 + 8.634 height_x = len(x['IDs']) 8.635 height_y = len(y['IDs']) 8.636 - 8.637 + 8.638 if height_x > height_y: 8.639 return -1 8.640 if height_x < height_y: 8.641 return 1 8.642 - 8.643 + 8.644 # same heights 8.645 - 8.646 + 8.647 width_x = x['end'] - x['start'] + 1 8.648 width_y = y['end'] - y['start'] + 1 8.649 - 8.650 + 8.651 if width_x > width_y: 8.652 return -1 8.653 if width_x < width_y: 8.654 return 1 8.655 - 8.656 + 8.657 return 0 8.658 - 8.659 - 8.660 - 8.661 - 8.662 + 8.663 + 8.664 + 8.665 + 8.666 def rectangle_filter(self, rectangle): 8.667 - 8.668 + 8.669 height = len(rectangle['IDs']) 8.670 - 8.671 + 8.672 width = rectangle['end'] - rectangle['start'] + 1 8.673 - 8.674 + 8.675 if height >= 2 and width >= self.min_width: 8.676 return True 8.677 else: 8.678 return False 8.679 - 8.680 - 8.681 - 8.682 - 8.683 + 8.684 + 8.685 + 8.686 + 8.687 def splice_to_rect(self, occupied_current): 8.688 """ 8.689 Splice multitude of alignment elements to rectangles 8.690 - 8.691 + 8.692 Returns list of rectangles: 8.693 rectangle is dict: 8.694 'start' 8.695 'end' 8.696 'IDs' - list 8.697 - 8.698 + 8.699 """ 8.700 - 8.701 + 8.702 # occupied_current_1 = splice_to_outrect(occupied_current, occupied) 8.703 8.704 occupied_current_1 = clon(occupied_current) 8.705 - 8.706 + 8.707 width = len(occupied_current_1.values()[0]) 8.708 - 8.709 - 8.710 + 8.711 + 8.712 rectangles = [] 8.713 - 8.714 + 8.715 rectangles_count = 0 8.716 - 8.717 - 8.718 - while 1: 8.719 - 8.720 + 8.721 + 8.722 + while 1: 8.723 + 8.724 excluded_IDs = {} 8.725 - 8.726 + 8.727 while 1: 8.728 - 8.729 + 8.730 IDs_occupied = {} # is ID occupied 8.731 - 8.732 + 8.733 height = {} # this position height 8.734 - 8.735 + 8.736 for pos in xrange(0, width): 8.737 - 8.738 + 8.739 height[pos] = 0 8.740 - 8.741 + 8.742 for ID, e in occupied_current_1.items(): 8.743 - 8.744 + 8.745 if excluded_IDs.has_key(ID): 8.746 continue 8.747 - 8.748 + 8.749 if e[pos]: 8.750 height[pos] += 1 8.751 - 8.752 + 8.753 IDs_occupied[ID] = 1 8.754 - 8.755 - 8.756 + 8.757 + 8.758 max_height = len(IDs_occupied.keys()) 8.759 - 8.760 - 8.761 + 8.762 + 8.763 if max_height <= 1: 8.764 break 8.765 - 8.766 - 8.767 - 8.768 + 8.769 + 8.770 + 8.771 max_height_positions = [] 8.772 - 8.773 + 8.774 for pos in xrange(0, width): 8.775 if height[pos] == max_height: 8.776 max_height_positions.append(pos) 8.777 - 8.778 - 8.779 + 8.780 + 8.781 parts = self.splice_to_continued(max_height_positions, IDs_occupied.keys()) 8.782 8.783 - 8.784 + 8.785 ok = 0 8.786 - 8.787 + 8.788 for part in parts: 8.789 - 8.790 + 8.791 if len(part) >= self.min_width: 8.792 - 8.793 + 8.794 # It is block! 8.795 - 8.796 + 8.797 IDs = IDs_occupied.keys() 8.798 start = part[0] 8.799 end = part[-1] 8.800 - 8.801 - 8.802 + 8.803 + 8.804 rectangles.append(clon({'IDs': IDs, 'start': start, 'end': end, 'cores':[]})) 8.805 - 8.806 - 8.807 + 8.808 + 8.809 # exclude elements 8.810 - 8.811 + 8.812 for ID in IDs: 8.813 for pos in xrange(start, end + 1): 8.814 occupied_current_1[ID][pos] = 0 8.815 - 8.816 - 8.817 - 8.818 + 8.819 + 8.820 + 8.821 ok = 1 8.822 - 8.823 + 8.824 if ok: 8.825 continue 8.826 - 8.827 - 8.828 - 8.829 - 8.830 + 8.831 + 8.832 + 8.833 + 8.834 # find the shortest sequence 8.835 - 8.836 - 8.837 + 8.838 + 8.839 max_width_dict = {} 8.840 - 8.841 + 8.842 for ID, e in occupied_current_1.items(): 8.843 - 8.844 + 8.845 if excluded_IDs.has_key(ID): 8.846 continue 8.847 - 8.848 + 8.849 positions = [] 8.850 - 8.851 + 8.852 for pos in xrange(0, width): 8.853 - 8.854 + 8.855 if height[pos] <= 1: 8.856 # "bad" position 8.857 continue 8.858 - 8.859 + 8.860 if e[pos]: 8.861 positions.append(pos) 8.862 - 8.863 + 8.864 if len(positions) == 0: 8.865 continue 8.866 - 8.867 + 8.868 parts = self.splice_to_continued(positions, [ID]) 8.869 - 8.870 + 8.871 max_part_len = 0 8.872 - 8.873 + 8.874 for part in parts: 8.875 max_part_len = max(len(part), max_part_len) 8.876 - 8.877 + 8.878 max_width_dict[ID] = max_part_len 8.879 - 8.880 - 8.881 + 8.882 + 8.883 if len(max_width_dict.values()): 8.884 min_width = min(max_width_dict.values()) 8.885 else: 8.886 break 8.887 - 8.888 + 8.889 # exclude these sequences 8.890 - 8.891 + 8.892 for ID, e in occupied_current_1.items(): 8.893 - 8.894 + 8.895 if max_width_dict.has_key(ID): 8.896 #if max_width_dict[ID] == min_width: 8.897 if max_width_dict[ID] <= min_width: 8.898 - 8.899 + 8.900 excluded_IDs[ID] = 1 8.901 - 8.902 + 8.903 # for pos in xrange(0, width): 8.904 # occupied_current_1[ID][pos] = 0 8.905 8.906 - 8.907 + 8.908 # print rectangles_count 8.909 - 8.910 + 8.911 if len(rectangles) == rectangles_count: 8.912 break 8.913 - 8.914 + 8.915 rectangles_count = len(rectangles) 8.916 - 8.917 + 8.918 # print rectangles_count 8.919 - 8.920 - 8.921 + 8.922 + 8.923 return rectangles 8.924 - 8.925 - 8.926 - 8.927 - 8.928 - 8.929 - 8.930 - 8.931 - 8.932 - 8.933 - 8.934 - 8.935 - 8.936 - 8.937 - 8.938 - 8.939 - 8.940 - 8.941 - 8.942 - 8.943 - 8.944 - 8.945 - 8.946 - 8.947 - 8.948 - 8.949 + 8.950 + 8.951 + 8.952 + 8.953 + 8.954 + 8.955 + 8.956 + 8.957 + 8.958 + 8.959 + 8.960 + 8.961 + 8.962 + 8.963 + 8.964 + 8.965 + 8.966 + 8.967 + 8.968 + 8.969 + 8.970 + 8.971 + 8.972 + 8.973 + 8.974 def splice_to_continued(self, LIST, IDs): 8.975 """ 8.976 LIST is a list of positions of alignment 8.977 - 8.978 + 8.979 returns list of continued lists, constituted source LIST 8.980 """ 8.981 - 8.982 + 8.983 LIST.sort() 8.984 8.985 8.986 @@ -713,38 +713,38 @@ 8.987 for pos in LIST: 8.988 8.989 if len(parts[-1]): 8.990 - 8.991 + 8.992 if pos - parts[-1][-1] == 1: 8.993 - 8.994 + 8.995 parts[-1].append(pos) 8.996 - 8.997 + 8.998 else: 8.999 - 8.1000 + 8.1001 # check omited sequence 8.1002 - 8.1003 + 8.1004 gappes = 1 # all omited sequence is gaps 8.1005 - 8.1006 + 8.1007 for x in xrange(parts[-1][-1] + 1, pos): 8.1008 - 8.1009 + 8.1010 for ID in IDs: 8.1011 if self.structure1[ID][x]: 8.1012 gappes = 0 8.1013 break 8.1014 - 8.1015 + 8.1016 if gappes == 0: 8.1017 break 8.1018 - 8.1019 + 8.1020 if not gappes: 8.1021 - 8.1022 + 8.1023 # There are not only gapes 8.1024 parts.append([pos]) 8.1025 - 8.1026 - else: 8.1027 + 8.1028 + else: 8.1029 # first element 8.1030 - 8.1031 + 8.1032 parts[-1].append(pos) 8.1033 - 8.1034 - 8.1035 + 8.1036 + 8.1037 return clon(parts) 8.1038 8.1039
9.1 --- a/blocks3d/GeometricalCore_class.py Wed Dec 15 02:22:38 2010 +0300 9.2 +++ b/blocks3d/GeometricalCore_class.py Wed Dec 15 18:30:19 2010 +0300 9.3 @@ -45,14 +45,14 @@ 9.4 9.5 9.6 9.7 -import os # to control file existence 9.8 +import os # to control file existence 9.9 9.10 import urllib2 # to upload PDB from Internet 9.11 9.12 import re # regulas expression 9.13 9.14 9.15 -from Kliki import Kliki # algorithm to find core(s) from graph 9.16 +from Kliki import Kliki # algorithm to find core(s) from graph 9.17 9.18 import AAdict # dict to convert 3-N code to 1-N 9.19 import AlignmentSeq # sequense alignment class 9.20 @@ -73,14 +73,14 @@ 9.21 9.22 self.structure 9.23 3d Coordinates of CA atoms of each structure 9.24 - 9.25 + 9.26 Formation: 9.27 self.structure[structure name][atom identifier] = list [x,y,z] 9.28 where: 9.29 structure name - string like "1jcc:A" 9.30 atom identifier - number of residue i PDB file, first is 0 9.31 x,y,x - Coordinates of CA atoms in PDB 9.32 - 9.33 + 9.34 9.35 self.seq 9.36 sequences of each structures 9.37 @@ -88,11 +88,11 @@ 9.38 Formation: 9.39 self.seq[structure name] = string like "SSNAKIDQLSSDAQ" 9.40 where: structure name - string like "1jcc:A" 9.41 - 9.42 + 9.43 self.structure1 - same as self.structure, but atom identifier is number in block 9.44 - 9.45 + 9.46 self.d - distances 9.47 - self.d[ID][i][j] = distance between i and j atoms in structure ID 9.48 + self.d[ID][i][j] = distance between i and j atoms in structure ID 9.49 9.50 9.51 9.52 @@ -109,13 +109,13 @@ 9.53 self.alternative_core_new_atoms - part of new atoms in alternative core 9.54 9.55 self.alternative_cores_count - max number of cores (including main core) 9.56 - 9.57 + 9.58 """ 9.59 9.60 9.61 9.62 9.63 - 9.64 + 9.65 def __init__(self): 9.66 9.67 """ 9.68 @@ -123,21 +123,21 @@ 9.69 9.70 Creates self vars self.structure and self.seq 9.71 """ 9.72 - 9.73 + 9.74 vars(self).update(vars(GeometricalCore_config)) 9.75 9.76 9.77 self.structure = {} 9.78 self.structure1 = {} 9.79 self.d = {} 9.80 - 9.81 - 9.82 + 9.83 + 9.84 self.seq = {} 9.85 self.pdb_text = {} # there will be PDB texts in this dict. key - string like "1jcc" 9.86 - 9.87 - 9.88 - 9.89 - 9.90 + 9.91 + 9.92 + 9.93 + 9.94 9.95 9.96 9.97 @@ -148,7 +148,7 @@ 9.98 If you use at first one boundaries (in positions) of block in some PDB file, 9.99 and then decide to change them, run this method before running check_block method 9.100 """ 9.101 - 9.102 + 9.103 self.structure1 = {} 9.104 self.d = {} 9.105 9.106 @@ -180,108 +180,108 @@ 9.107 # 9.108 # 9.109 # if alternative_cores_count != None: 9.110 -# self.alternative_cores_count = alternative_cores_count 9.111 +# self.alternative_cores_count = alternative_cores_count 9.112 # 9.113 # if timeout != None: 9.114 -# self.timeout = timeout 9.115 +# self.timeout = timeout 9.116 # 9.117 # 9.118 # 9.119 9.120 9.121 9.122 - 9.123 + 9.124 def to_ID(self, pdb_code, pdb_chain, model): 9.125 - 9.126 + 9.127 """ 9.128 Makes standart ID for structure from pdb_code, chain and model 9.129 """ 9.130 - 9.131 + 9.132 ID = pdb_code + '_' 9.133 - 9.134 + 9.135 if pdb_chain: 9.136 ID = ID + pdb_chain.upper().strip() 9.137 9.138 if model: 9.139 ID = ID + '_' + model 9.140 - 9.141 + 9.142 return ID 9.143 - 9.144 - 9.145 - 9.146 - 9.147 - 9.148 - 9.149 + 9.150 + 9.151 + 9.152 + 9.153 + 9.154 + 9.155 def from_ID(self, ID): 9.156 - 9.157 + 9.158 """ 9.159 Parse standart ID to pdb_code, chain and model 9.160 """ 9.161 - 9.162 - 9.163 + 9.164 + 9.165 if not hasattr(self, 're1'): 9.166 - 9.167 - 9.168 + 9.169 + 9.170 # makes compiled regular expressions 9.171 - 9.172 + 9.173 # for pdb-codes 9.174 self.re1 = re.compile(r"(^|[^a-z0-9])([0-9][0-9a-z]{3})([^a-z0-9]([0-9a-z ]?)([^a-z0-9]([0-9]{1,3}))?)?($|[^a-z0-9])") 9.175 - 9.176 + 9.177 # for files 9.178 self.re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$") 9.179 - 9.180 - 9.181 + 9.182 + 9.183 self.from_ID_dict = {} 9.184 - 9.185 - 9.186 - 9.187 - 9.188 + 9.189 + 9.190 + 9.191 + 9.192 if self.from_ID_dict.has_key(ID): 9.193 return self.from_ID_dict[ID] 9.194 - 9.195 - 9.196 + 9.197 + 9.198 if ID.lower().find('.ent') != -1 or ID.lower().find('.pdb') != -1: 9.199 # it is file 9.200 parseO = self.re2.search(ID) # files 9.201 else: 9.202 parseO = self.re1.search(ID.lower()) # pdb codes 9.203 - 9.204 - 9.205 + 9.206 + 9.207 if not parseO: 9.208 return None 9.209 - 9.210 + 9.211 parse = parseO.groups() 9.212 - 9.213 - 9.214 + 9.215 + 9.216 if len(parse) < 2: 9.217 return None 9.218 - 9.219 - 9.220 + 9.221 + 9.222 code = parse[1] 9.223 - 9.224 + 9.225 # if not file: 9.226 # code = code.lower() 9.227 - 9.228 + 9.229 chain = '' 9.230 model = None 9.231 - 9.232 + 9.233 if len(parse) >= 4: 9.234 chain = parse[3] 9.235 - 9.236 + 9.237 if chain: 9.238 chain = chain.upper() 9.239 - 9.240 + 9.241 if len(parse) >= 6: 9.242 if parse[5]: 9.243 model = parse[5] 9.244 - 9.245 - 9.246 - 9.247 + 9.248 + 9.249 + 9.250 self.from_ID_dict[ID] = (code, chain, model) # save parsing results 9.251 - 9.252 + 9.253 return code, chain, model 9.254 - 9.255 - 9.256 + 9.257 + 9.258 9.259 9.260 9.261 @@ -296,15 +296,15 @@ 9.262 It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None] 9.263 SSNAKIDQLSSDAQ - part of alignment block sequence 9.264 Last parameter - model or None 9.265 - 9.266 + 9.267 9.268 Result: 9.269 returns list of core atom identifiers (first is 0) 9.270 9.271 Example: [0,1,2,3,4,6] 9.272 """ 9.273 - 9.274 - 9.275 + 9.276 + 9.277 9.278 9.279 # check lendth of input sequences 9.280 @@ -316,13 +316,13 @@ 9.281 print 'Sequences must have equal lengths' 9.282 print 'Task was aborted!' 9.283 return 9.284 - 9.285 9.286 9.287 - 9.288 9.289 9.290 - 9.291 + 9.292 + 9.293 + 9.294 pdb_list = [] # list of uploading PDB names 9.295 9.296 for pdb_code, pdb_chain, seq_part, model in block: 9.297 @@ -330,12 +330,12 @@ 9.298 ID = self.to_ID(pdb_code, pdb_chain, model) 9.299 9.300 if not self.seq.has_key(ID) or not self.structure.has_key(ID): 9.301 - 9.302 + 9.303 # pdb_code = pdb_code.lower() 9.304 - 9.305 + 9.306 if pdb_chain: 9.307 pdb_chain = pdb_chain.upper() 9.308 - 9.309 + 9.310 pdb_list.append((pdb_code, pdb_chain, model)) 9.311 9.312 9.313 @@ -348,41 +348,41 @@ 9.314 for pdb_code, pdb_chain, seq_part, model in block: 9.315 9.316 ID = self.to_ID(pdb_code, pdb_chain, model) 9.317 - 9.318 - 9.319 + 9.320 + 9.321 9.322 9.323 9.324 if not self.seq.has_key(ID) or not self.structure.has_key(ID): # no structure :( 9.325 print 'Undefined structuce ' + ID 9.326 print 'Task was aborted!' 9.327 - return 9.328 + return 9.329 9.330 if not self.seq[ID] or not self.structure[ID]: # no structure :( 9.331 print 'Undefined structuce ' + ID 9.332 print 'Task was aborted!' 9.333 return 9.334 - 9.335 9.336 9.337 9.338 - 9.339 9.340 -# absent = {} # [position identifier] = 1, if some structure has not this atom 9.341 + 9.342 + 9.343 +# absent = {} # [position identifier] = 1, if some structure has not this atom 9.344 9.345 # structure = {} # [structure name][position in block] = list [x,y,z] 9.346 - 9.347 + 9.348 IDs = [] 9.349 - 9.350 + 9.351 for pdb_code, pdb_chain, seq_part, model in block: 9.352 9.353 ID = self.to_ID(pdb_code, pdb_chain, model) 9.354 - 9.355 + 9.356 IDs.append(ID) 9.357 9.358 if self.structure1.has_key(ID): 9.359 continue 9.360 - 9.361 + 9.362 9.363 # alignment part of sequence with full sequence 9.364 connections = self.find_sequence_piece(self.seq[ID], seq_part) 9.365 @@ -393,7 +393,7 @@ 9.366 self.structure1[ID] = [] 9.367 9.368 9.369 - 9.370 + 9.371 for i in xrange(0, len(connections)): 9.372 if connections[i] == '-': # there is not required position in PDB 9.373 self.structure1[ID].append(None) 9.374 @@ -434,15 +434,15 @@ 9.375 9.376 # 9.377 # # save CA atom coordinates 9.378 -# self.structure1 = structure 9.379 +# self.structure1 = structure 9.380 # 9.381 - 9.382 + 9.383 return cores 9.384 9.385 9.386 9.387 9.388 - 9.389 + 9.390 9.391 9.392 9.393 @@ -465,7 +465,7 @@ 9.394 codes = [] # list of PDB codes 9.395 9.396 # get texts of each PDB 9.397 - 9.398 + 9.399 for pdb_code, pdb_chain, model in pdb_list: 9.400 if not codes.count(pdb_code): 9.401 codes.append(pdb_code) 9.402 @@ -475,53 +475,53 @@ 9.403 9.404 9.405 # check PDB file existance in folder ./pdb/ 9.406 - 9.407 + 9.408 fname = 'pdb/' + pdb_code 9.409 - 9.410 + 9.411 if pdb_code.lower().find('.ent') == -1 and pdb_code.lower().find('.pdb') == -1: 9.412 # pdb code 9.413 fname += '.ent' 9.414 - 9.415 - 9.416 + 9.417 + 9.418 if os.path.exists(fname): 9.419 self.pdb_text[pdb_code] = open(fname).readlines() 9.420 else: 9.421 - # try to upload from Internet 9.422 + # try to upload from Internet 9.423 try: 9.424 9.425 url = self.pdb_url.replace('XXXX', pdb_code) 9.426 - 9.427 + 9.428 self.pdb_text[pdb_code] = urllib2.urlopen(url).readlines() 9.429 9.430 - 9.431 + 9.432 except: 9.433 self.pdb_text[pdb_code] = '' 9.434 - 9.435 - 9.436 + 9.437 + 9.438 if self.save: 9.439 # save information 9.440 - 9.441 + 9.442 try: 9.443 - 9.444 - 9.445 + 9.446 + 9.447 # check folder "/pdb" existance 9.448 - 9.449 + 9.450 if not os.path.exists('pdb'): 9.451 - 9.452 + 9.453 # make folder 9.454 os.mkdir('pdb'); 9.455 - 9.456 - 9.457 + 9.458 + 9.459 pdbfile_to_save = open('pdb/' + pdb_code + '.ent', 'w') 9.460 pdbfile_to_save.write(''.join(self.pdb_text[pdb_code])) 9.461 pdbfile_to_save.close() 9.462 - 9.463 + 9.464 print 'PDB structure saved to file pdb/' + pdb_code + '.ent' 9.465 - 9.466 + 9.467 except: 9.468 - 9.469 + 9.470 print 'Can not write file pdb/' + pdb_code + '.ent' 9.471 - 9.472 + 9.473 9.474 if not self.pdb_text[pdb_code]: 9.475 #print self.pdb_url.replace('XXXX', pdb_code) 9.476 @@ -532,43 +532,43 @@ 9.477 # We have all PDB texts 9.478 9.479 9.480 - # read all texts and get CA atoms coordinates 9.481 + # read all texts and get CA atoms coordinates 9.482 9.483 for pdb_code, pdb_chain, model in pdb_list: 9.484 - 9.485 + 9.486 ID = self.to_ID(pdb_code, pdb_chain, model) 9.487 - 9.488 + 9.489 S = self.read_pdb(pdb_code, pdb_chain, self.pdb_text[pdb_code], model) 9.490 self.structure[ID], self.seq[ID] = S 9.491 9.492 9.493 if not self.seq[ID]: 9.494 print 'Can not find sequence for "' + ID + '"' 9.495 - return 9.496 - 9.497 + return 9.498 9.499 9.500 9.501 - 9.502 + 9.503 + 9.504 9.505 def read_pdb(self, pdb_code, pdb_chain, pdb_text, model): 9.506 """ 9.507 Reads pdb_text 9.508 - 9.509 + 9.510 Returns (structure, sequence) 9.511 structure (key - string like "1jcc:A") 9.512 This element is dict with [x,y,z] coordinates of each aa 9.513 sequence 9.514 """ 9.515 9.516 - 9.517 + 9.518 9.519 structure = [] # dict with coordinates list 9.520 seq = '' # sequense (string) 9.521 - 9.522 - 9.523 + 9.524 + 9.525 Model_already = 0 # is it our model now 9.526 - 9.527 + 9.528 if not model: 9.529 Model_already = 1 9.530 9.531 @@ -577,52 +577,52 @@ 9.532 9.533 9.534 for currentline in pdb_text: 9.535 - 9.536 - 9.537 + 9.538 + 9.539 if not Model_already: 9.540 if currentline[0:5] == 'MODEL': 9.541 if currentline[10:14].strip() == model: 9.542 Model_already = 1 9.543 continue 9.544 - 9.545 + 9.546 continue 9.547 - 9.548 - 9.549 - 9.550 + 9.551 + 9.552 + 9.553 if currentline[0:6] == 'ENDMDL': 9.554 break 9.555 9.556 9.557 if len(currentline) < 54: 9.558 continue 9.559 - 9.560 - 9.561 + 9.562 + 9.563 9.564 if currentline[0:4] != "ATOM": 9.565 continue 9.566 - 9.567 - 9.568 + 9.569 + 9.570 atomType = currentline[12:16].strip() 9.571 9.572 if atomType != "CA": 9.573 continue 9.574 - 9.575 - 9.576 + 9.577 + 9.578 thisChain = currentline[21].strip() 9.579 9.580 if thisChain != pdb_chain: 9.581 continue 9.582 - 9.583 - 9.584 + 9.585 + 9.586 alterCode = currentline[16] # Alter code 9.587 - 9.588 + 9.589 if alterCode != ' ' and alterCode != 'A': 9.590 continue 9.591 9.592 - 9.593 + 9.594 thisAminoAcid = currentline[17:20].strip() 9.595 9.596 - 9.597 + 9.598 thisX = float(currentline[30:38].strip()) 9.599 thisY = float(currentline[38:46].strip()) 9.600 thisZ = float(currentline[46:54].strip()) 9.601 @@ -637,7 +637,7 @@ 9.602 seq = seq + AAdict[thisAminoAcid][0] 9.603 else: 9.604 seq = seq + 'x' 9.605 - 9.606 + 9.607 return (structure, seq) 9.608 9.609 9.610 @@ -645,35 +645,34 @@ 9.611 9.612 9.613 9.614 -### 9.615 -### 9.616 -### 9.617 +### 9.618 +### 9.619 +### 9.620 ### def add_pdb(self, pdb_code, pdb_chain, pdb_text, model): 9.621 -### 9.622 +### 9.623 ### """ 9.624 ### Method read PDB and get coordinates of CA atoms 9.625 -### 9.626 +### 9.627 ### Arguments: 9.628 ### pdb_code - PDB name, for example "1jcc" 9.629 ### pdb_chain - chain, for example "A" 9.630 ### pdb_text - PDB file strings 9.631 -### 9.632 -### 9.633 +### 9.634 +### 9.635 ### Result: 9.636 ### New element will be added in dict self.structure (key - string like "1jcc:A") 9.637 ### This element is dict with [x,y,z] coordinates of each aa 9.638 -### 9.639 +### 9.640 ### """ 9.641 -### 9.642 +### 9.643 ### ID = self.to_ID(pdb_code, pdb_chain, model) 9.644 -### 9.645 +### 9.646 ### self.structure[ID], self.seq[ID] = read_pdb(pdb_code, pdb_chain, pdb_text, model) 9.647 -### 9.648 -### 9.649 -### 9.650 +### 9.651 +### 9.652 +### 9.653 9.654 9.655 - 9.656 9.657 9.658 9.659 @@ -689,8 +688,9 @@ 9.660 9.661 9.662 9.663 - 9.664 - 9.665 + 9.666 + 9.667 + 9.668 9.669 9.670 9.671 @@ -730,10 +730,10 @@ 9.672 9.673 9.674 # At first, calculate conformity without gapes 9.675 - 9.676 + 9.677 connections0 = [] # key - seq_part position, value - position in seq_part without gapes 9.678 part_usage = 0 # how many aa from seqpart were used 9.679 - 9.680 + 9.681 for i in xrange(0, len(seq_part)): 9.682 s = seq_part[i] 9.683 if s == '-': 9.684 @@ -741,15 +741,15 @@ 9.685 else: 9.686 connections0.append(part_usage) 9.687 part_usage += 1 9.688 - 9.689 - 9.690 9.691 9.692 9.693 9.694 9.695 9.696 - 9.697 + 9.698 + 9.699 + 9.700 9.701 connections1 = [] 9.702 9.703 @@ -769,7 +769,7 @@ 9.704 connections1.append('-') 9.705 # print 'Can not find', AA_part, len(connections1), 'of', seq_part, 'in', seq_all 9.706 continue 9.707 - 9.708 + 9.709 9.710 connections1.append(all_usage - 1) 9.711 9.712 @@ -788,10 +788,10 @@ 9.713 9.714 9.715 9.716 - 9.717 - 9.718 - 9.719 - 9.720 + 9.721 + 9.722 + 9.723 + 9.724 9.725 9.726 9.727 @@ -805,51 +805,51 @@ 9.728 9.729 """ 9.730 Calculates distances 9.731 - 9.732 + 9.733 adds new elements to self.d 9.734 """ 9.735 9.736 9.737 # Atom count 9.738 aton_count = len(self.structure1.values()[0]) 9.739 - 9.740 + 9.741 for ID, structure in self.structure1.items(): 9.742 - 9.743 + 9.744 if self.d.has_key(ID): 9.745 continue # already done 9.746 - 9.747 + 9.748 self.d[ID] = [] 9.749 9.750 9.751 - # create distance matrix 9.752 + # create distance matrix 9.753 for atom1 in xrange(0, aton_count): 9.754 self.d[ID].append([]) 9.755 for atom2 in xrange(0, aton_count): 9.756 self.d[ID][atom1].append(None) 9.757 9.758 9.759 - # done distance matrix 9.760 - 9.761 + # done distance matrix 9.762 + 9.763 for atom1 in xrange(0, aton_count): 9.764 for atom2 in xrange(0, aton_count): 9.765 9.766 if atom2 == atom1: 9.767 break 9.768 - 9.769 + 9.770 if structure[atom1] and structure[atom2]: 9.771 9.772 dist = 0 9.773 9.774 for xyz in xrange(0, 3): 9.775 - 9.776 + 9.777 dist += (structure[atom1][xyz] - structure[atom2][xyz]) ** 2 9.778 9.779 dist = dist ** 0.5 9.780 9.781 self.d[ID][atom1][atom2] = dist 9.782 self.d[ID][atom2][atom1] = dist 9.783 - 9.784 - 9.785 + 9.786 + 9.787 9.788 9.789 9.790 @@ -865,12 +865,12 @@ 9.791 9.792 Arguments: 9.793 IDs - list of IDs to study 9.794 - 9.795 - 9.796 + 9.797 + 9.798 Result: 9.799 returns [alignment core graph, cost graf] 9.800 9.801 - 9.802 + 9.803 alignment core graph example: 9.804 graf[0][1] = 1 0 and 1 positions are connected 9.805 graf[0][1] = 0 0 and 1 positions are not connected 9.806 @@ -882,19 +882,19 @@ 9.807 9.808 graf = {} 9.809 9.810 - cost = {} # distance spreading 9.811 + cost = {} # distance spreading 9.812 9.813 - 9.814 - 9.815 - 9.816 - 9.817 + 9.818 + 9.819 + 9.820 + 9.821 for atom1 in xrange(0, aton_count): 9.822 9.823 - 9.824 + 9.825 graf[atom1] = {} 9.826 9.827 cost[atom1] = {} 9.828 - 9.829 + 9.830 for atom2 in xrange(0, aton_count): 9.831 9.832 9.833 @@ -902,26 +902,26 @@ 9.834 graf[atom1][atom2] = 1 9.835 break 9.836 9.837 - 9.838 + 9.839 distances = [] 9.840 - 9.841 + 9.842 for ID in IDs: 9.843 distances.append(self.d[ID][atom1][atom2]) 9.844 9.845 - 9.846 + 9.847 if distances.count(None): 9.848 graf[atom1][atom2] = 0 9.849 graf[atom2][atom1] = 0 9.850 9.851 else: 9.852 - 9.853 + 9.854 spreading = max(distances) - min(distances) 9.855 9.856 if spreading > self.delta: 9.857 - 9.858 + 9.859 graf[atom1][atom2] = 0 9.860 graf[atom2][atom1] = 0 9.861 - 9.862 + 9.863 else: 9.864 graf[atom1][atom2] = 1 9.865 graf[atom2][atom1] = 1 9.866 @@ -929,7 +929,7 @@ 9.867 cost[atom1][atom2] = -spreading 9.868 cost[atom2][atom1] = -spreading 9.869 9.870 - 9.871 + 9.872 9.873 return [graf, cost] 9.874 9.875 @@ -966,7 +966,7 @@ 9.876 if self.alternative_core_new_atoms: 9.877 9.878 # compare this core with all added cores 9.879 - 9.880 + 9.881 for one_core in new_cores: 9.882 new_atoms = 0 9.883 9.884 @@ -980,15 +980,12 @@ 9.885 9.886 9.887 # if this core is good 9.888 - if ok == 1: 9.889 + if ok == 1: 9.890 new_cores.append(core) 9.891 9.892 9.893 9.894 return new_cores 9.895 - 9.896 - 9.897 - 9.898 9.899 9.900 9.901 @@ -1006,3 +1003,6 @@ 9.902 9.903 9.904 9.905 + 9.906 + 9.907 +
10.1 --- a/blocks3d/Kliki.py Wed Dec 15 02:22:38 2010 +0300 10.2 +++ b/blocks3d/Kliki.py Wed Dec 15 18:30:19 2010 +0300 10.3 @@ -34,14 +34,14 @@ 10.4 10.5 compsub - ?????? ??????? ??? ??????? ???? 10.6 """ 10.7 - 10.8 10.9 - 10.10 10.11 10.12 10.13 10.14 - 10.15 + 10.16 + 10.17 + 10.18 def __init__ (self, graf, cost = None, limit_count=0, min_size=0, timeout=10): 10.19 10.20 """ 10.21 @@ -57,7 +57,7 @@ 10.22 ?????? ???? ?????? ????? ??????????. 10.23 ??? ?????? ????, ??? ???? ??????? ??? ????? 10.24 ???????????? ??? fast_algorithm ? ??? ?????????? ???? 10.25 - 10.26 + 10.27 10.28 limit_count - ???????????? ????? ????, ??????? ????? 10.29 ???? ?????? 0, ?? ??????????? ??? ????? 10.30 @@ -65,15 +65,15 @@ 10.31 min_size - min size of returning klika 10.32 10.33 timeout - time in sec. for BRON-KERBOSH algorithm 10.34 - 10.35 + 10.36 """ 10.37 10.38 - 10.39 + 10.40 10.41 self.graf = graf 10.42 self.cost = cost 10.43 10.44 - 10.45 + 10.46 self.kliki = [] 10.47 10.48 self.timeout = timeout 10.49 @@ -95,40 +95,40 @@ 10.50 while deleted: 10.51 10.52 deleted = 0 10.53 - 10.54 + 10.55 for atom, c in connections.items(): 10.56 - 10.57 + 10.58 if c < min_size: 10.59 - 10.60 + 10.61 del connections[atom] 10.62 - 10.63 + 10.64 for atom1, connect in graf[atom].items(): 10.65 if connect == 1 and connections.has_key(atom1): 10.66 connections[atom1] -= 1 10.67 deleted = 1 10.68 10.69 - 10.70 - 10.71 - 10.72 + 10.73 + 10.74 + 10.75 10.76 bank_l = {} 10.77 - 10.78 + 10.79 for atom, c in connections.items(): 10.80 - 10.81 + 10.82 if not bank_l.has_key(c): 10.83 bank_l[c] = [] 10.84 - 10.85 + 10.86 bank_l[c].append(atom) 10.87 10.88 10.89 - keys = [] 10.90 - 10.91 + keys = [] 10.92 + 10.93 if len(bank_l.keys()): 10.94 for c in xrange(min(bank_l.keys()), max(bank_l.keys())+1): 10.95 if bank_l.has_key(c): 10.96 keys.extend(bank_l[c]) 10.97 - 10.98 - 10.99 + 10.100 + 10.101 10.102 10.103 # RUN BRON-KERBOSH 10.104 @@ -141,7 +141,7 @@ 10.105 self.fast_algorithm(keys[:]) # run fast algorithm 10.106 10.107 10.108 - 10.109 + 10.110 10.111 10.112 # ?????????? ?????????? ????? ?? ???????? ????? ????????? ? ??? 10.113 @@ -154,7 +154,7 @@ 10.114 ## min_l=len(self.kliki[0]) # ??????????? ?????? 10.115 10.116 bank_l = {} 10.117 - 10.118 + 10.119 for klika in self.kliki: 10.120 klika.sort() 10.121 l = len(klika) # ????? ??????? ????? 10.122 @@ -173,12 +173,12 @@ 10.123 kliki=[] 10.124 10.125 #print self.cost 10.126 - 10.127 + 10.128 if len(bank_l.keys()): 10.129 - 10.130 + 10.131 r = range(min(bank_l.keys()), max(bank_l.keys())+1) 10.132 r.reverse() 10.133 - 10.134 + 10.135 for l in r: 10.136 if (bank_l.has_key(l)): 10.137 10.138 @@ -191,7 +191,7 @@ 10.139 10.140 # ??????????? ?? ?? ???????? ????? ???? cost 10.141 10.142 - 10.143 + 10.144 10.145 costs = [] 10.146 10.147 @@ -200,7 +200,7 @@ 10.148 c = 0 10.149 10.150 for i in klika: 10.151 - 10.152 + 10.153 if not self.cost.has_key(i): 10.154 continue 10.155 10.156 @@ -209,7 +209,7 @@ 10.157 if j == i: 10.158 break 10.159 10.160 - 10.161 + 10.162 if not self.cost[i].has_key(j): 10.163 continue 10.164 10.165 @@ -233,15 +233,15 @@ 10.166 del costs[n] 10.167 10.168 k = k1 10.169 - 10.170 + 10.171 kliki.extend(k) 10.172 - 10.173 + 10.174 ## kliki.reverse() 10.175 10.176 if limit_count: 10.177 if len(kliki) > limit_count: # ??????? ??????????? ?? ????? ???? 10.178 kliki = kliki[:limit_count] 10.179 - 10.180 + 10.181 10.182 self.kliki = kliki[:] 10.183 10.184 @@ -249,10 +249,10 @@ 10.185 10.186 10.187 10.188 - 10.189 - 10.190 10.191 - 10.192 + 10.193 + 10.194 + 10.195 10.196 def bron_kerbosh (self, keys): 10.197 10.198 @@ -273,9 +273,9 @@ 10.199 10.200 10.201 print 'Bron and Kerbosh algorithm started' 10.202 - 10.203 + 10.204 start_time = time.time() 10.205 - 10.206 + 10.207 # ????... 10.208 while 1: 10.209 10.210 @@ -284,14 +284,14 @@ 10.211 10.212 10.213 10.214 - 10.215 + 10.216 10.217 # ???????? candidates ? used ?? ?????? 10.218 10.219 #print depth 10.220 - 10.221 + 10.222 candidates = list_candidates[depth][:] 10.223 - used = list_used[depth][:] 10.224 + used = list_used[depth][:] 10.225 10.226 10.227 10.228 @@ -301,25 +301,25 @@ 10.229 depth -= 1 10.230 10.231 if compsub: 10.232 - compsub.pop() 10.233 + compsub.pop() 10.234 continue 10.235 10.236 - 10.237 10.238 10.239 - 10.240 - 10.241 + 10.242 + 10.243 + 10.244 # ? used ?? ???????? ???????, ??????????? ?? ????? ????????? ?? candidates 10.245 # (??? ?? used ?? ????????? ???? ?? ? 1 ?? candidates) 10.246 10.247 used_candidates = 0 10.248 - 10.249 + 10.250 for used1 in used: 10.251 for candidates1 in candidates: 10.252 if self.graf[used1][candidates1] == 0: 10.253 break 10.254 else: 10.255 - used_candidates = 1 10.256 + used_candidates = 1 10.257 10.258 if used_candidates: 10.259 depth -= 1 10.260 @@ -327,8 +327,8 @@ 10.261 if compsub: 10.262 compsub.pop() 10.263 continue 10.264 - 10.265 - 10.266 + 10.267 + 10.268 10.269 10.270 10.271 @@ -341,7 +341,7 @@ 10.272 10.273 10.274 10.275 - 10.276 + 10.277 # ????????? new_candidates ? new_used, ?????? ?? candidates ? used ???????, ?? ?????????? ? v 10.278 # (?? ????, ???????? ?????? ?????????? ? v) 10.279 new_candidates = [] 10.280 @@ -349,7 +349,7 @@ 10.281 if self.graf[candidates1][v] == 1 and candidates1 != v: 10.282 new_candidates.append(candidates1) 10.283 10.284 - 10.285 + 10.286 new_used = [] 10.287 for used1 in used: 10.288 if self.graf[used1][v] == 1 and used1 != v: 10.289 @@ -358,7 +358,7 @@ 10.290 10.291 10.292 10.293 - # ??????? v ?? candidates ? ???????? ? used 10.294 + # ??????? v ?? candidates ? ???????? ? used 10.295 del list_candidates[depth][0] 10.296 list_used[depth].append(v) 10.297 10.298 @@ -367,33 +367,33 @@ 10.299 if len(new_candidates) == 0 and len(new_used) == 0: 10.300 # compsub ? ????? 10.301 self.kliki.append(compsub[:]) 10.302 - 10.303 + 10.304 else: 10.305 # ????? ?????????? ???????? bron_kerbosh(new_candidates, new_used) 10.306 10.307 depth += 1 10.308 - 10.309 + 10.310 10.311 # TIMEOUT check start 10.312 if self.timeout != -1: 10.313 - 10.314 + 10.315 if time.time() - start_time > self.timeout: 10.316 10.317 self.kliki = [] 10.318 return 10.319 # TIMEOUT check end 10.320 10.321 - 10.322 - 10.323 - 10.324 + 10.325 + 10.326 + 10.327 if depth >= len(list_candidates): 10.328 list_candidates.append([]) 10.329 list_used.append([]) 10.330 10.331 - 10.332 + 10.333 list_candidates[depth] = new_candidates[:] 10.334 list_used[depth] = new_used[:] 10.335 - 10.336 + 10.337 continue 10.338 10.339 10.340 @@ -442,7 +442,7 @@ 10.341 10.342 if not excluded.has_key(i): 10.343 keys1.append(i) 10.344 - 10.345 + 10.346 if len(keys1) == 0: 10.347 break 10.348 10.349 @@ -454,9 +454,9 @@ 10.350 connections = {} # index - atom, value - connections value 10.351 10.352 for i in keys1: 10.353 - 10.354 + 10.355 connections[i] = 0 10.356 - 10.357 + 10.358 for j in keys1: 10.359 10.360 if i != j and self.graf[i][j]: 10.361 @@ -466,8 +466,8 @@ 10.362 if max(connections.values()) == min(connections.values()): 10.363 # all atoms are equal 10.364 break 10.365 - 10.366 - 10.367 + 10.368 + 10.369 exclude_connect = min(connections.values()) # excluded atoms connections 10.370 10.371 10.372 @@ -479,7 +479,7 @@ 10.373 for i in keys1: 10.374 10.375 cost_sum[i] = 0 10.376 - 10.377 + 10.378 if connections[i] == exclude_connect: 10.379 10.380 for j in keys1: 10.381 @@ -495,13 +495,13 @@ 10.382 keys2 = [] 10.383 10.384 for i in keys1: 10.385 - 10.386 + 10.387 if connections[i] == exclude_connect: 10.388 10.389 if cost_sum[i] == exclude_cost: 10.390 10.391 continue 10.392 - 10.393 + 10.394 keys2.append(i) 10.395 10.396 keys1 = clon(keys2) 10.397 @@ -511,11 +511,11 @@ 10.398 keys2 = [] 10.399 10.400 for i in keys1: 10.401 - 10.402 + 10.403 if connections[i] == exclude_connect: 10.404 10.405 continue 10.406 - 10.407 + 10.408 keys2.append(i) 10.409 10.410 keys1 = clon(keys2) 10.411 @@ -546,22 +546,22 @@ 10.412 10.413 10.414 break 10.415 - 10.416 - 10.417 + 10.418 + 10.419 else: 10.420 # no new atoms 10.421 break 10.422 - 10.423 - 10.424 + 10.425 + 10.426 10.427 # keys1 is klika 10.428 10.429 self.kliki.append(keys1[:]) 10.430 10.431 - 10.432 10.433 - 10.434 - 10.435 - 10.436 10.437 - 10.438 + 10.439 + 10.440 + 10.441 + 10.442 +
11.1 --- a/blocks3d/clon.py Wed Dec 15 02:22:38 2010 +0300 11.2 +++ b/blocks3d/clon.py Wed Dec 15 18:30:19 2010 +0300 11.3 @@ -10,9 +10,9 @@ 11.4 11.5 11.6 if t == list or t == tuple: 11.7 - 11.8 + 11.9 r = [] 11.10 - 11.11 + 11.12 for i in obj: 11.13 r.append(clon(i)) 11.14 11.15 @@ -23,9 +23,9 @@ 11.16 11.17 11.18 if t == dict: 11.19 - 11.20 + 11.21 r = {} 11.22 - 11.23 + 11.24 for key, value in obj.items(): 11.25 r[key] = clon(value) 11.26 11.27 @@ -34,5 +34,5 @@ 11.28 11.29 return obj 11.30 11.31 - 11.32 - 11.33 + 11.34 +
12.1 --- a/pytale/dummy_pytale.py Wed Dec 15 02:22:38 2010 +0300 12.2 +++ b/pytale/dummy_pytale.py Wed Dec 15 18:30:19 2010 +0300 12.3 @@ -11,7 +11,7 @@ 12.4 1) MenuBar 12.5 2) Resizable panel with ListBox containing sequence names 12.6 3) Panel with RichTextBox containing the alignment itself 12.7 - 4) StatusBar 12.8 + 4) StatusBar 12.9 """ 12.10 def __init__(self, parent, title, size=(1000, 600)): 12.11 global settings 12.12 @@ -140,7 +140,7 @@ 12.13 end = self.text.XYToPosition(column+1, line) 12.14 self.text.SetStyle(start, end, self.settings.conservation_styles[conser]) 12.15 print 'colouring done' 12.16 - 12.17 + 12.18 12.19 class Settings(object): 12.20 def __init__(self):
13.1 --- a/repeats/repeat_joiner.py Wed Dec 15 02:22:38 2010 +0300 13.2 +++ b/repeats/repeat_joiner.py Wed Dec 15 18:30:19 2010 +0300 13.3 @@ -40,7 +40,7 @@ 13.4 group_ori -- if (position of real_start() of this interval in the group) 13.5 < (position of real_end()) 13.6 """ 13.7 - 13.8 + 13.9 def __init__(self, repeat_joiner, chromosome_name, pos_from, pos_to, ori=True): 13.10 repeat_joiner._intervals.append(self) 13.11 self.start = pos_from 13.12 @@ -52,13 +52,13 @@ 13.13 repeat_joiner[chromosome_name] = Chromosome(chromosome_name) 13.14 self.chromosome = repeat_joiner[chromosome_name] 13.15 repeat_joiner[chromosome_name].insert_interval(self) 13.16 - 13.17 + 13.18 @staticmethod 13.19 def pair(interval_1, interval_2): 13.20 """ connect these interval with thick edge """ 13.21 interval_1.sib = interval_2 13.22 interval_2.sib = interval_1 13.23 - 13.24 + 13.25 def __eq__(self, other): 13.26 return self.chromosome == other.chromosome and \ 13.27 self.start == other.start and \ 13.28 @@ -67,7 +67,7 @@ 13.29 13.30 def __ne__(self, other): 13.31 return not (self == other) 13.32 - 13.33 + 13.34 def tuple(self): 13.35 return (self.start, self.end, self.ori, self.chromosome) 13.36 13.37 @@ -92,7 +92,7 @@ 13.38 def edges(self, min_intersection=config.min_intersection): 13.39 """ visit thick edge at first and then all thin edges """ 13.40 return [self.sib] + self.thin_edges(min_intersection) 13.41 - 13.42 + 13.43 @property 13.44 def real_start(self): 13.45 """ real start of interval (depends on ori) """ 13.46 @@ -108,7 +108,7 @@ 13.47 return self.end 13.48 else: 13.49 return self.start - 1 13.50 - 13.51 + 13.52 def __len__(self): 13.53 """ length of interval """ 13.54 return self.end - self.start 13.55 @@ -145,7 +145,7 @@ 13.56 else: 13.57 chromosome_ori = other.ori == other.group_ori 13.58 self.group_ori = self.ori == chromosome_ori 13.59 - delta = self.real_start - other.real_start 13.60 + delta = self.real_start - other.real_start 13.61 if chromosome_ori: 13.62 self.group_real_start = other.group_real_start + delta 13.63 else: 13.64 @@ -171,20 +171,20 @@ 13.65 13.66 class IntervalGroup(list): 13.67 """ list of intervals """ 13.68 - 13.69 + 13.70 def __init__(self): 13.71 self.start = 0 13.72 13.73 def calc_start(self): 13.74 """ shift all grou coordinates to make min position zero """ 13.75 - offset = -min(min(interval.group_real_start, interval.group_real_end) 13.76 + offset = -min(min(interval.group_real_start, interval.group_real_end) 13.77 for interval in self) 13.78 for interval in self: 13.79 interval.group_real_start += offset 13.80 13.81 class RepeatJoiner(dict): 13.82 - """ dictionary {<chromosome name>: chromosome} 13.83 - 13.84 + """ dictionary {<chromosome name>: chromosome} 13.85 + 13.86 Data: 13.87 interval_groups -- list of interval_groups 13.88 _intervals = [] 13.89 @@ -200,7 +200,7 @@ 13.90 2 13.91 >>> print rj.interval_groups 13.92 [[1: [10(0), 100(90)), 1: [50(0), 150(100))]] 13.93 - 13.94 + 13.95 >>> rj = RepeatJoiner() 13.96 >>> Interval.pair(Interval(rj, '1', 10, 100), Interval(rj, '1', 110, 200)) 13.97 >>> Interval.pair(Interval(rj, '1', 30, 120), Interval(rj, '1', 7110, 7200)) 13.98 @@ -249,7 +249,7 @@ 13.99 r = q.get() 13.100 if r not in used: 13.101 used.add(r) 13.102 - interval_group.append(r) 13.103 + interval_group.append(r) 13.104 for r1 in r.edges(): 13.105 if r1 not in used: 13.106 q.put(r1)
14.1 --- a/repeats/repeats.py Wed Dec 15 02:22:38 2010 +0300 14.2 +++ b/repeats/repeats.py Wed Dec 15 18:30:19 2010 +0300 14.3 @@ -18,7 +18,7 @@ 14.4 14.5 p = argparse.ArgumentParser( 14.6 description='Repeats joining tool', 14.7 -formatter_class=argparse.ArgumentDefaultsHelpFormatter, 14.8 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 14.9 #~ argument_default=argparse.SUPPRESS, 14.10 ) 14.11 14.12 @@ -48,12 +48,12 @@ 14.13 repeat_copies_pos.append((pos_from, pos_to)) 14.14 repeat_copies = [] 14.15 for pos_from, pos_to in repeat_copies_pos: 14.16 - seq = Sequence.file_slice(p.I, pos_from, pos_to, p.r 14.17 - repeat_copies.append(seq) 14.18 + seq = Sequence.file_slice(p.I, pos_from, pos_to, p.r 14.19 + repeat_copies.append(seq) 14.20 alignment = Alignment.from_sequences(*repeat_copies) 14.21 alignment.muscle_align() 14.22 alignment.save_fasta(p.o) 14.23 - 14.24 + 14.25 except Exception, t: 14.26 print t 14.27 exit()
15.1 --- a/repeats/test.py Wed Dec 15 02:22:38 2010 +0300 15.2 +++ b/repeats/test.py Wed Dec 15 18:30:19 2010 +0300 15.3 @@ -17,7 +17,7 @@ 15.4 to1 = int(to1) + 1 15.5 from2 = int(from2) 15.6 to2 = int(to2) + 1 15.7 - 15.8 + 15.9 r1 = Interval(rj, c1, from1, to1, ori1) 15.10 r2 = Interval(rj, c2, from2, to2, ori2) 15.11 Interval.pair(r1, r2) 15.12 @@ -34,5 +34,5 @@ 15.13 if interval.tuple() in prev: 15.14 continue 15.15 prev.add(interval.tuple()) 15.16 - print "%i\t%s" % (i, str(interval).replace(' ', '\t')) 15.17 + print "%i\t%s" % (i, str(interval).replace(' ', '\t')) 15.18
16.1 --- a/sandbox/bufferedcanvas.py Wed Dec 15 02:22:38 2010 +0300 16.2 +++ b/sandbox/bufferedcanvas.py Wed Dec 15 18:30:19 2010 +0300 16.3 @@ -23,9 +23,9 @@ 16.4 published by the Free Software Foundation; either version 2.1 of the 16.5 License, or (at your option) any later version. 16.6 16.7 -As a special exception, the copyright holders of this library 16.8 +As a special exception, the copyright holders of this library 16.9 hereby recind Section 3 of the GNU Lesser General Public License. This 16.10 -means that you MAY NOT apply the terms of the ordinary GNU General 16.11 +means that you MAY NOT apply the terms of the ordinary GNU General 16.12 Public License instead of this License to any given copy of the 16.13 Library. This has been done to prevent users of the Library from being 16.14 denied access or the ability to use future improvements.
17.1 --- a/sandbox/gtk-text.py Wed Dec 15 02:22:38 2010 +0300 17.2 +++ b/sandbox/gtk-text.py Wed Dec 15 18:30:19 2010 +0300 17.3 @@ -14,7 +14,7 @@ 17.4 17.5 def __init__(self): 17.6 window = gtk.Window(gtk.WINDOW_TOPLEVEL) 17.7 - window.set_resizable(True) 17.8 + window.set_resizable(True) 17.9 window.connect("destroy", self.close_application) 17.10 17.11 sw = gtk.ScrolledWindow() 17.12 @@ -95,7 +95,7 @@ 17.13 17.14 def main(): 17.15 gtk.main() 17.16 - return 0 17.17 + return 0 17.18 17.19 if __name__ == "__main__": 17.20 TextViewExample()
18.1 --- a/sandbox/tk-text.py Wed Dec 15 02:22:38 2010 +0300 18.2 +++ b/sandbox/tk-text.py Wed Dec 15 18:30:19 2010 +0300 18.3 @@ -26,7 +26,7 @@ 18.4 # for i in xrange(len(body)): 18.5 # t.insert('end', body[i], 'c%d' % ids[i]) 18.6 # t.insert('end', '\n') 18.7 -# 18.8 +# 18.9 # for i in xrange(11): 18.10 # c = i * 255 // 10 18.11 # t.tag_configure('c%d' % i, background='#%02x%02x%02x' % (c,c,c)) 18.12 @@ -39,10 +39,10 @@ 18.13 # t.insert('end', body[i], 'l%dc%d' % (y, ids[i])) 18.14 # t.insert('end', '\n') 18.15 # root.update() 18.16 -# 18.17 +# 18.18 # print "text created" 18.19 # root.update() 18.20 -# 18.21 +# 18.22 # for y in xrange(len(seqs)): 18.23 # for i in xrange(11): 18.24 # c = i * 255 // 10
19.1 --- a/sandbox/ttk.py Wed Dec 15 02:22:38 2010 +0300 19.2 +++ b/sandbox/ttk.py Wed Dec 15 18:30:19 2010 +0300 19.3 @@ -78,7 +78,7 @@ 19.4 global seqs 19.5 names.delete(0, 'end') 19.6 sequences.delete('1.0', 'end') 19.7 - 19.8 + 19.9 filename = filedialog.askopenfilename() 19.10 seqs = set() 19.11 for item in open(filename).read().split('\n>'):
20.1 --- a/sandbox/wx-textctrl.py Wed Dec 15 02:22:38 2010 +0300 20.2 +++ b/sandbox/wx-textctrl.py Wed Dec 15 18:30:19 2010 +0300 20.3 @@ -31,7 +31,7 @@ 20.4 # for i in xrange(11): 20.5 # c = i * 255 // 10 20.6 # attrs.append(wx.TextAttr('black', '#%02x%02x%02x' % (c,c,c))) 20.7 -# 20.8 +# 20.9 # for name, body, ids, colors in seqs: 20.10 # for x in xrange(len(body)): 20.11 # text.SetDefaultStyle(attrs[ids[x]]) 20.12 @@ -40,7 +40,7 @@ 20.13 20.14 # ## v4: output text, then colorize 20.15 # text.WriteText("\n".join(body for name, body, ids, colors in seqs)) 20.16 -# 20.17 +# 20.18 # i = 0 20.19 # text.Freeze() 20.20 # for name, body, ids, colors in seqs:
21.1 --- a/sec_str/sec_str.py Wed Dec 15 02:22:38 2010 +0300 21.2 +++ b/sec_str/sec_str.py Wed Dec 15 18:30:19 2010 +0300 21.3 @@ -16,7 +16,7 @@ 21.4 21.5 p = argparse.ArgumentParser( 21.6 description='Secondary structure mapping tool.', 21.7 -formatter_class=argparse.ArgumentDefaultsHelpFormatter, 21.8 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 21.9 #~ argument_default=argparse.SUPPRESS, 21.10 ) 21.11