allpy
view allpy/base.py @ 262:e361a7b7d9aa
Moved contents of allpy.sequence into allpy.base
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Tue, 14 Dec 2010 21:06:42 +0300 |
parents | d60628e29b24 |
children | e3783fca343e |
line source
16 """Class of monomer types.
18 Each MonomerType object represents a known monomer type, e.g. Valine,
19 and is referenced to by each instance of monomer in a given sequence.
21 - `name`: full name of monomer type
22 - `code1`: one-letter code
23 - `code3`: three-letter code
24 - `is_modified`: either of True or False
26 class atributes:
28 - `by_code1`: a mapping from one-letter code to MonomerType object
29 - `by_code3`: a mapping from three-letter code to MonomerType object
30 - `by_name`: a mapping from monomer name to MonomerType object
31 - `instance_type`: class of Monomer objects to use when creating new
32 objects; this must be redefined in descendent classes
34 All of the class attributes MUST be redefined when subclassing.
35 """
51 # We duplicate distinguished long names into MonomerType itself,
52 # so that we can use MonomerType.from_code3 to create the relevant
53 # type of monomer.
57 @classmethod
59 """Create all relevant instances of MonomerType.
61 `type_letter` is either of:
63 - 'p' for protein
64 - 'd' for DNA
65 - 'r' for RNA
67 `codes` is a table of monomer codes
68 """
73 @classmethod
75 """Return monomer type by one-letter code."""
78 @classmethod
80 """Return monomer type by three-letter code."""
83 @classmethod
85 """Return monomer type by name."""
89 """Create a new monomer of given type."""
98 """Monomer object.
100 attributes:
102 - `type`: type of monomer (a MonomerType object)
104 class attribute `monomer_type` is MonomerType or either of it's subclasses,
105 it is used when creating new monomers. It MUST be redefined when subclassing Monomer.
106 """
112 @classmethod
116 @classmethod
120 @classmethod
130 """ Sequence of Monomers
132 list of monomer objects (aminoacids or nucleotides)
134 Mandatory data:
135 * name -- str with the name of sequence
136 * description -- str with description of the sequence
138 Optional (may be empty):
139 * source -- source of sequence
140 * pdb_chain -- Bio.PDB.Chain
141 * pdb_file -- file object
143 * pdb_residues -- {Monomer: Bio.PDB.Residue}
144 * pdb_secstr -- {Monomer: 'Secondary structure'}
145 Code Secondary structure
146 H alpha-helix
147 B Isolated beta-bridge residue
148 E Strand
149 G 3-10 helix
150 I pi-helix
151 T Turn
152 S Bend
153 - Other
156 ?TODO: global pdb_structures
157 """
173 """ Returns sequence in one-letter code """
177 """ Returns if all corresponding monomers of this sequences are equal
179 If lengths of sequences are not equal, returns False
180 """
188 """ Reads Pdb chain from file
190 and align each Monomer with PDB.Residue (TODO)
191 """
208 """ Delete all pdb-connected links """
209 #~ gc.get_referrers(self.pdb_chains[0])
215 @staticmethod
217 """ Import data from one-letter code
219 monomer_kind is class, inherited from MonomerType
220 """
224 @staticmethod
226 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
228 chain is Bio.PDB.Chain
229 """
245 """ Adds pdb information to each monomer
247 Returns if information has been successfully added
248 TODO: conformity_file
250 id-format lava flow
251 """
275 """ Saves pdb_chain to out_file """
289 """ Add secondary structure data """
311 @staticmethod
312 def file_slice(file, n_from, n_to, fasta_name='', name='', description='', monomer_kind=AminoAcidType):
313 """ Build and return sequence, consisting of part of sequence from file
315 Does not control gaps
316 """
331 """ Alignment
333 {<Sequence object>:[<Monomer object>,None,<Monomer object>]}
334 keys are the Sequence objects, values are the lists, which
335 contain monomers of those sequences or None for gaps in the
336 corresponding sequence of alignment
337 """
338 # _sequences -- list of Sequence objects. Sequences don't contain gaps
339 # - see sequence.py module
342 """overloaded constructor
344 Alignment() -> new empty Alignment
345 Alignment(sequences, body) -> new Alignment with sequences and
346 body initialized from arguments
347 Alignment(fasta_file) -> new Alignment, read body and sequences
348 from fasta file
350 """
361 """ Returns width, ie length of each sequence with gaps """
365 """ The number of sequences in alignment (it's thickness). """
369 """ Calculate the identity of alignment positions for colouring.
371 For every (row, column) in alignment the percentage of the exactly
372 same residue in the same column in the alignment is calculated.
373 The data structure is just like the Alignment.body, but istead of
374 monomers it contains float percentages.
375 """
376 # Oh, God, that's awful! Absolutely not understandable.
377 # First, calculate percentages of amino acids in every column
393 # Second, map these percentages onto the alignment
407 @staticmethod
409 """ Import data from fasta file
411 monomer_kind is class, inherited from MonomerType
413 >>> import alignment
414 >>> sequences,body=alignment.Alignment.from_fasta(open("test.fasta"))
415 """
434 #if there is description
461 @staticmethod
463 """ Constructs new alignment from sequences
465 Add None's to right end to make equal lengthes of alignment sequences
466 """
476 """ Saves alignment to given file
478 Splits long lines to substrings of length=long_line
479 To prevent this, set long_line=None
480 """
484 """ Simple align ths alignment using sequences (muscle)
486 uses old Monomers and Sequences objects
487 """
512 """ returns list of columns of alignment
514 sequence or sequences:
515 if sequence is given, then column is (original_monomer, monomer)
516 if sequences is given, then column is (original_monomer, {sequence: monomer})
517 if both of them are given, it is an error
518 original (Sequence type):
519 if given, this filters only columns represented by original sequence
520 """
536 """ Returns string representing secondary structure """
542 """ Block of alignment
544 Mandatory data:
545 * self.alignment -- alignment object, which the block belongs to
546 * self.sequences - set of sequence objects that contain monomers
547 and/or gaps, that constitute the block
548 * self.positions -- list of positions of the alignment.body that
549 are included in the block; position[i+1] is always to the right from position[i]
551 Don't change self.sequences -- it may be a link to other block.sequences
553 How to create a new block:
554 >>> import alignment
555 >>> import block
556 >>> proj = alignment.Alignment(open("test.fasta"))
557 >>> block1 = block.Block(proj)
558 """
561 """ Builds new block from alignment
563 if sequences==None, all sequences are used
564 if positions==None, all positions are used
565 """
575 """ Saves alignment to given file in fasta-format
577 No changes in the names, descriptions or order of the sequences
578 are made.
579 """
590 """ Returns length-sorted list of blocks, representing GCs
592 max_delta -- threshold of distance spreading
593 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
594 minsize -- min size of each core
595 ac_new_atoms -- min part or new atoms in new alternative core
596 current GC is compared with each of already selected GCs
597 if difference is less then ac_new_atoms, current GC is skipped
598 difference = part of new atoms in current core
599 ac_count -- max number of cores (including main core)
600 -1 means infinity
601 If more than one pdb chain for some sequence provided, consider all of them
602 cost is calculated as 1 / (delta + 1)
603 delta in [0, +inf) => cost in (0, 1]
604 """
632 break
636 break
640 """ Returns string consisting of gap chars and chars x at self.positions
642 Length of returning string = length of alignment
643 """
650 """ Save xstring and name in fasta format """
654 """ Iterates monomers of this sequence from this block """
659 """ Iterates Ca-atom of monomers of this sequence from this block """
663 """ Iterates pairs (sequence, chain) """
670 """ Superimpose all pdb_chains in this block """
679 # Apply rotation/translation to the moving atoms
683 """ Save all sequences
685 Returns {(sequence, chain): CHAIN}
686 CHAIN is chain letter in new file
687 """
693 # TODO: read from tmp_file.name
694 # change CHAIN
695 # add to out_file
699 # vim: set ts=4 sts=4 sw=4 et: