Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/c4c772e3ce86
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 07:50:41 2012
Кодировка:

# HG changeset patch
# User Daniil Alexeyevsky
# Date 1339254944 -14400
# Node ID c4c772e3ce8679d555d2340e560c77ac91ce97e5
# Parent d9872ba42f15d631bce05684aafcd60885fab553
Added to dna.Sequence method `translate` to translate into protein sequence using given genetic code

diff -r d9872ba42f15 -r c4c772e3ce86 allpy/data/genetic_code.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/allpy/data/genetic_code.py Sat Jun 09 19:15:44 2012 +0400
@@ -0,0 +1,40 @@
+"""Standard genetic code. (DNA/RNA -> protein translation tables).
+
+Genetic code is presented as dictionary mappting three one-letter nucleotide
+codes to amino-acid name.
+"""
+
+_genetic_code = {
+ 'Stop': ('TAA', 'TAG', 'TGA'),
+ 'Alanine': ('GCA', 'GCC', 'GCG', 'GCT'),
+ 'Cysteine': ('TGC', 'TGT'),
+ 'Aspartic acid': ('GAC', 'GAT'),
+ 'Glutamic acid': ('GAA', 'GAG'),
+ 'Phenylalanine': ('TTC', 'TTT'),
+ 'Glycine': ('GGA', 'GGC', 'GGG', 'GGT'),
+ 'Histidine': ('CAC', 'CAT'),
+ 'Isoleucine': ('ATA', 'ATC', 'ATT'),
+ 'Lysine': ('AAA', 'AAG'),
+ 'Leucine': ('CTA', 'CTC', 'CTG', 'CTT', 'TTA', 'TTG'),
+ 'Methionine': ('ATG',),
+ 'Asparagine': ('AAC', 'AAT'),
+ 'Proline': ('CCA', 'CCC', 'CCG', 'CCT'),
+ 'Glutamine': ('CAA', 'CAG'),
+ 'Arginine': ('AGA', 'AGG', 'CGA', 'CGC', 'CGG', 'CGT'),
+ 'Serine': ('AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT'),
+ 'Threonine': ('ACA', 'ACC', 'ACG', 'ACT'),
+ 'Valine': ('GTA', 'GTC', 'GTG', 'GTT'),
+ 'Tryptophan': ('TGG',),
+ 'Tyrosine': ('TAC', 'TAT'),
+}
+
+standard_dna_code = dict((triplet, aa)
+ for aa, triplets in _genetic_code.items()
+ for triplet in triplets
+)
+"""Genetic code table for direct DNA -> protein translation."""
+
+standard_rna_code = dict((triplet.replace("T", "U"), aa)
+ for triplet, aa in standard_dna_code.items()
+)
+"""Standard genetic code table."""
diff -r d9872ba42f15 -r c4c772e3ce86 allpy/dna.py
--- a/allpy/dna.py Sun Jun 03 23:55:00 2012 +0400
+++ b/allpy/dna.py Sat Jun 09 19:15:44 2012 +0400
@@ -1,7 +1,9 @@
import base
import data.codes
+from data.genetic_code import standard_dna_code

import dna
+import protein

class Monomer(base.Monomer):
"""DNA monomers: nucleotides."""
@@ -30,6 +32,45 @@
result.append_monomer(complement.get(monomer.code1, 'N'))
return result

+ def translated(self, code=None, name=None, description=None, source=None):
+ """Return a new protein sequence translated from self.
+
+ `code` is a dict of triplet of dna `code1`s -> aminoacid `name`.
+
+ If `code` is not specified, the standard genetic code is used.
+
+ If `code` is specified, it may contain only the changed codons.
+
+ Class of proteins to use is `self.types.protein.Sequence`, you
+ are free to replace it at will.
+
+ Return new protein sequence where:
+
+ * `name` is self.name with "_tr" appended
+ * `description` is self.description with " translated" appended
+ * `source` is the same as self.source
+ """
+ if code:
+ code, modification = dict(standard_dna_code), code
+ code.update(modification)
+ else:
+ code = standard_dna_code
+ result = self.types.protein.Sequence([],
+ name=name or self.name + "_tr",
+ description=description or self.description + " translated",
+ source=source or self.source
+ )
+ seen_stop = False
+ for a, b, c in zip(self[::3], self[1::3], self[2::3]):
+ assert not seen_stop, "Stop-codon must be the last one"
+ triplet = a.code1 + b.code1 + c.code1
+ aa_name = code[triplet]
+ if aa_name == "Stop":
+ seen_stop = True
+ continue
+ result.append_monomer(name=aa_name)
+ return result
+
class Column(base.Column):
types = dna

diff -r d9872ba42f15 -r c4c772e3ce86 test/test_dna.py
--- a/test/test_dna.py Sun Jun 03 23:55:00 2012 +0400
+++ b/test/test_dna.py Sat Jun 09 19:15:44 2012 +0400
@@ -1,3 +1,4 @@
+from nose.tools import raises
from allpy import dna

def test_dna():
@@ -5,3 +6,13 @@
s1 = s.reverse_complemented()
assert s1.name == "seq1'"
assert str(s1) == "GCTCTTCCGATCT"
+ assert str(s.translated()) == "RSEE"
+
+ del s[12:]
+ s += dna.Sequence.from_string("TGA")
+ assert str(s.translated()) == "RSEE"
+
+@raises(AssertionError)
+def test_translation_stop():
+ s = dna.Sequence.from_string("TGATCGGAAGAGC")
+ print s.translated()
diff -r d9872ba42f15 -r c4c772e3ce86 test/test_realign.py
--- a/test/test_realign.py Sun Jun 03 23:55:00 2012 +0400
+++ b/test/test_realign.py Sat Jun 09 19:15:44 2012 +0400
@@ -1,4 +1,3 @@
-from nose.tools import raises
from allpy import protein, processors

example1 = (protein.Alignment().