Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/61b61f911a1a
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 07:49:41 2012
Кодировка:

# HG changeset patch
# User Daniil Alexeyevsky
# Date 1338660927 -14400
# Node ID 61b61f911a1a57e1520e50297a7a3a132262f4b4
# Parent 0aa68678861c3b6a9b57a4981e55bb4b23440f5e
Reorganized tests to make them more readable

diff -r 0aa68678861c -r 61b61f911a1a test/test_base.py
--- a/test/test_base.py Sat Jun 02 21:56:13 2012 +0400
+++ b/test/test_base.py Sat Jun 02 22:15:27 2012 +0400
@@ -1,22 +1,8 @@
-import re
-from StringIO import StringIO
-
-import allpy.base
-import allpy.protein as p
-from allpy import processors
-
-# XXX Totally missed:
-# XXX - monomer representation (should we test it at all?)
-# XXX - sequence comparison
-# XXX - sequence representation
-# XXX - Alignment.columns_as_lists
-# XXX - Alignment.flush (which is a backwards-compatibility wrapper)
-# XXX - Alignment.process (which is a backwards-compatibility alias)
-# XXX - Block.from_alignment with None in either of arguments
+from allpy import protein, base

def test_new_monomers():
"""Test creation of monomer objects"""
- s = allpy.base.Sequence()
+ s = base.Sequence()

try:
m = s.append_monomer(code1='A')
@@ -25,7 +11,7 @@
else:
assert False, "base.Seqeuence must not be constructible from code1"

- m = p.Sequence().append_monomer(code3='ALA')
+ m = protein.Sequence().append_monomer(code3='ALA')
assert m.__class__.__name__ == "Alanine"
assert m.code1 == "A"
assert m.code3 == "ALA"
@@ -48,123 +34,4 @@
m = s.append_monomer(code3='SEC')
assert m.name == "Selenocysteine"

-def assert_alignment(alignment, *body):
- """Helper: check if alignment representation matches strings of body"""
- s = StringIO()
- alignment.to_file(s)
- body = "".join([">\n%s\n" % string for string in body])
- assert s.getvalue() == body, "Expected:\n%s\nGot:\n%s" % (body, s.getvalue())
-
-def test_alignment_changes():
- """Test operations that modify alignment"""
-
- a = (p.Alignment().
- append_row_from_string("a-------cdef").
- append_row_from_string("ghiklmpq--").
- append_row_from_string("-------------rst"))
-
- assert_alignment(a,
- "A-------CDEF----",
- "GHIKLMPQ--------",
- "-------------RST",
- )
-
- a.realign(processors.Left())
- assert_alignment(a,
- "ACDEF-----------",
- "GHIKLMPQ--------",
- "RST-------------",
- )
-
- a.realign(processors.Center())
- assert_alignment(a,
- "-----ACDEF------",
- "----GHIKLMPQ----",
- "------RST-------",
- )
-
- a.realign(processors.Right())
- assert_alignment(a,
- "-----------ACDEF",
- "--------GHIKLMPQ",
- "-------------RST",
- )
-
-def test_stringio_conversions():
- a = (p.Alignment().
- append_row_from_string("a-c-d", name="Seq1").
- append_row_from_string("tgcga", name="Seq2"))
-
- o = StringIO()
- a.to_file(o, "fasta")
- got = o.getvalue()
- expect = ">Seq1\nA-C-D\n>Seq2\nTGCGA\n"
- assert expect == got, "Expected:\n%r\nGot:\n%r" % (expect, got)
-
- o = StringIO()
- a.to_file(o, "msf")
- got = o.getvalue()
- got = re.sub('(stdout MSF: 5 Type: N) .*', r'\1', got)
- expect = (
- "!!NA_MULTIPLE_ALIGNMENT 1.0\n"
- "\n"
- " stdout MSF: 5 Type: N 03/06/11 CompCheck: 1918 ..\n"
- "\n"
- " Name: Seq1 Len: 5 Check: 882 Weight: 1.00\n"
- " Name: Seq2 Len: 5 Check: 1036 Weight: 1.00\n"
- "\n"
- "//\n"
- "\n"
- " 1 5\n"
- "Seq1 A.C.D\n"
- "Seq2 TGCGA\n"
- "\n"
- )
- expect = re.sub('(stdout MSF: 5 Type: N) .*', r'\1', expect)
- assert expect == got, "Expected:\n%r\nGot:\n%r" % (expect, got)
-
- o.name = 'whoopie.msf'
- o.seek(0)
- b = p.Alignment().append_file(o, format='msf')
- assert b.rows_as_strings() == ["A-C-D", "TGCGA"]
- assert b.sequences[0].source == o.name
-
-def test_bio_io():
- """Test Bio python IO.
-
- BioPython does not support msf, so it is not covered in many other tests.
- """
- file = (
- "# STOCKHOLM 1.0\n"
- "#=GS seqA AC seqA123\n"
- "#=GS seqA DR PDB; 1abc ; 1-42;\n"
- "#=GS seqB AC seqB345\n"
- "SeqA SEQVENCEHELLO\n"
- "#=GR SeqA SS -HHHHH---CHHH\n"
- "SeqB SI-VENCE--LLO\n"
- "#=GR SeqB SS X-HHHHH--HHHH\n"
- "#=GC SS_cons X-HHHHH--HHHH\n"
- "#=GC seq_cons Si.VENCE.eLLo\n"
- "//\n"
- )
-
- o = StringIO()
- o.write(file)
- o.seek(0)
-
- aln = p.Alignment().append_file(o, "stockholm")
- assert len(aln.sequences) == 2
- assert len(aln.columns) == 13
- assert aln.sequences[1] not in aln.columns[2]
- assert aln.sequences[0][2].code1 == 'Q'
-
- o = StringIO()
- aln.to_file(o, format='stockholm')
- o.seek(0)
- for line in o:
- hd = line.strip().split()[0]
- assert hd in ('#', '#=GS', '#=GF', '#=GC', 'SeqA', 'SeqB', '//')
- o.seek(0)
- assert iter(o).next() == "# STOCKHOLM 1.0\n"
-
# vim: set et ts=4 sts=4 sw=4:
diff -r 0aa68678861c -r 61b61f911a1a test/test_io_biopython.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_io_biopython.py Sat Jun 02 22:15:27 2012 +0400
@@ -0,0 +1,42 @@
+from StringIO import StringIO
+from allpy import protein
+
+def test_bio_io():
+ """Test Bio python IO.
+
+ BioPython does not support msf, so it is not covered in many other tests.
+ """
+ file = (
+ "# STOCKHOLM 1.0\n"
+ "#=GS seqA AC seqA123\n"
+ "#=GS seqA DR PDB; 1abc ; 1-42;\n"
+ "#=GS seqB AC seqB345\n"
+ "SeqA SEQVENCEHELLO\n"
+ "#=GR SeqA SS -HHHHH---CHHH\n"
+ "SeqB SI-VENCE--LLO\n"
+ "#=GR SeqB SS X-HHHHH--HHHH\n"
+ "#=GC SS_cons X-HHHHH--HHHH\n"
+ "#=GC seq_cons Si.VENCE.eLLo\n"
+ "//\n"
+ )
+
+ o = StringIO()
+ o.write(file)
+ o.seek(0)
+
+ aln = protein.Alignment().append_file(o, "stockholm")
+ assert len(aln.sequences) == 2
+ assert len(aln.columns) == 13
+ assert aln.sequences[1] not in aln.columns[2]
+ assert aln.sequences[0][2].code1 == 'Q'
+
+ o = StringIO()
+ aln.to_file(o, format='stockholm')
+ o.seek(0)
+ for line in o:
+ hd = line.strip().split()[0]
+ assert hd in ('#', '#=GS', '#=GF', '#=GC', 'SeqA', 'SeqB', '//')
+ o.seek(0)
+ assert iter(o).next() == "# STOCKHOLM 1.0\n"
+
+# vim: set et ts=4 sts=4 sw=4:
diff -r 0aa68678861c -r 61b61f911a1a test/test_io_emboss.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_io_emboss.py Sat Jun 02 22:15:27 2012 +0400
@@ -0,0 +1,47 @@
+import re
+from StringIO import StringIO
+
+from allpy import protein
+
+def test_stringio_conversions():
+ # MSF is not supported by any other handler, except EMBOSS,
+ # so by testing MSF we essentially test EMBOSS
+ a = (protein.Alignment().
+ append_row_from_string("a-c-d", name="Seq1").
+ append_row_from_string("tgcga", name="Seq2"))
+
+ o = StringIO()
+ a.to_file(o, "fasta")
+ got = o.getvalue()
+ expect = ">Seq1\nA-C-D\n>Seq2\nTGCGA\n"
+ assert expect == got, "Expected:\n%r\nGot:\n%r" % (expect, got)
+
+ o = StringIO()
+ a.to_file(o, "msf")
+ got = o.getvalue()
+ got = re.sub('(stdout MSF: 5 Type: N) .*', r'\1', got)
+ expect = (
+ "!!NA_MULTIPLE_ALIGNMENT 1.0\n"
+ "\n"
+ " stdout MSF: 5 Type: N 03/06/11 CompCheck: 1918 ..\n"
+ "\n"
+ " Name: Seq1 Len: 5 Check: 882 Weight: 1.00\n"
+ " Name: Seq2 Len: 5 Check: 1036 Weight: 1.00\n"
+ "\n"
+ "//\n"
+ "\n"
+ " 1 5\n"
+ "Seq1 A.C.D\n"
+ "Seq2 TGCGA\n"
+ "\n"
+ )
+ expect = re.sub('(stdout MSF: 5 Type: N) .*', r'\1', expect)
+ assert expect == got, "Expected:\n%r\nGot:\n%r" % (expect, got)
+
+ o.name = 'whoopie.msf'
+ o.seek(0)
+ b = protein.Alignment().append_file(o, format='msf')
+ assert b.rows_as_strings() == ["A-C-D", "TGCGA"]
+ assert b.sequences[0].source == o.name
+
+ # vim: set et ts=4 sts=4 sw=4:
diff -r 0aa68678861c -r 61b61f911a1a test/test_realign.py
--- a/test/test_realign.py Sat Jun 02 21:56:13 2012 +0400
+++ b/test/test_realign.py Sat Jun 02 22:15:27 2012 +0400
@@ -1,17 +1,56 @@
from nose.tools import raises
from allpy import protein, processors

-example1 = protein.Alignment().\
- append_sequence(protein.Sequence.from_string("mkstf", name="abc")).\
- append_sequence(protein.Sequence.from_string("mstkfff", description="Longer sequence"))
+example1 = (protein.Alignment().
+ append_row_from_string("rrrgggfff---", name="abc").
+ append_row_from_string("---mmmrrrfff", name="xyz", description="Longer sequence"))
+
+def test_simple():
+ """Test primitive realigners: `Left`, `Right`, `Center`"""
+ example1.realign(processors.Left())
+ assert example1.rows_as_strings() == ["RRRGGGFFF---", "MMMRRRFFF---"]
+ example1.realign(processors.Right())
+ assert example1.rows_as_strings() == ["---RRRGGGFFF", "---MMMRRRFFF"]
+ example1.realign(processors.Center())
+ assert example1.rows_as_strings() == ["-RRRGGGFFF--", "-MMMRRRFFF--"]

def test_muscle():
+ """Test realignment with `muscle`"""
example1.realign(processors.Muscle())
+ assert example1.rows_as_strings() == ["---RRRGGGFFF", "MMMRRR---FFF"]

-example2 = protein.Alignment().\
- append_sequence(protein.Sequence.from_string("mkstf", name="a")).\
- append_sequence(protein.Sequence.from_string("mstkfff", name="a"))
+def test_needle():
+ """Test realignment with `needle`"""
+ example1.realign(processors.Needle())
+ assert example1.rows_as_strings() == ["---RRRGGGFFF", "MMMRRR---FFF"]
+
+def test_realign_block():
+ """Test what happens when we realign a block."""
+ example1.realign(processors.Muscle())
+ block = protein.Block.from_alignment(example1, columns=example1.columns[:9])
+ block.realign(processors.Center())
+ print example1.rows_as_strings()
+ assert example1.rows_as_strings() == ["-RRRGGG--FFF", "-MMMRRR--FFF"]
+
+example2 = (protein.Alignment().
+ append_row_from_string("rrrr", name="a").
+ append_row_from_string("rrrr", name="a"))

@raises(AssertionError)
-def test_muscle_failure():
+def test_muscle_duplicate():
+ """`muscle` must be unable to deal with alignments with non-unique ids"""
example2.realign(processors.Muscle())
+
+@raises(AssertionError)
+def test_muscle_duplicate():
+ """`needle` must be unable to deal with alignments with non-unique ids"""
+ example2.realign(processors.Needle())
+
+example3 = (protein.Alignment().
+ append_row_from_string("n", name="xxx").
+ append_row_from_string("m", name=""))
+
+#@raises(AssertionError)
+#def test_needle_empty():
+# """`needle` must be unable to deal with sequences with empty name"""
+# example3.realign(processors.Needle())