| rev |
line source |
|
me@261
|
1 import sys |
|
me@261
|
2 |
|
me@315
|
3 import util |
|
me@284
|
4 import fasta |
|
me@260
|
5 import data.codes |
|
me@260
|
6 |
|
me@306
|
7 default_gaps = set((".", "-", "~")) |
|
me@306
|
8 """Set of characters to recoginze as gaps when parsing alignment.""" |
|
me@306
|
9 |
|
me@260
|
10 class MonomerType(object): |
|
me@260
|
11 """Class of monomer types. |
|
me@260
|
12 |
|
me@260
|
13 Each MonomerType object represents a known monomer type, e.g. Valine, |
|
me@260
|
14 and is referenced to by each instance of monomer in a given sequence. |
|
me@260
|
15 |
|
me@260
|
16 - `name`: full name of monomer type |
|
me@260
|
17 - `code1`: one-letter code |
|
me@260
|
18 - `code3`: three-letter code |
|
me@260
|
19 - `is_modified`: either of True or False |
|
me@260
|
20 |
|
me@260
|
21 class atributes: |
|
me@260
|
22 |
|
me@260
|
23 - `by_code1`: a mapping from one-letter code to MonomerType object |
|
me@260
|
24 - `by_code3`: a mapping from three-letter code to MonomerType object |
|
me@260
|
25 - `by_name`: a mapping from monomer name to MonomerType object |
|
me@260
|
26 - `instance_type`: class of Monomer objects to use when creating new |
|
me@260
|
27 objects; this must be redefined in descendent classes |
|
me@260
|
28 |
|
me@260
|
29 All of the class attributes MUST be redefined when subclassing. |
|
me@260
|
30 """ |
|
me@260
|
31 |
|
me@260
|
32 by_code1 = {} |
|
me@260
|
33 by_code3 = {} |
|
me@260
|
34 by_name = {} |
|
me@260
|
35 instance_type = None |
|
me@260
|
36 |
|
me@260
|
37 def __init__(self, name="", code1="", code3="", is_modified=False): |
|
me@310
|
38 super(MonomerType, self).__init__() |
|
me@260
|
39 self.name = name.capitalize() |
|
me@260
|
40 self.code1 = code1.upper() |
|
me@260
|
41 self.code3 = code3.upper() |
|
me@260
|
42 self.is_modified = bool(is_modified) |
|
me@260
|
43 if not is_modified: |
|
me@260
|
44 self.by_code1[self.code1] = self |
|
me@260
|
45 self.by_code3[code3] = self |
|
me@260
|
46 self.by_name[name] = self |
|
me@260
|
47 # We duplicate distinguished long names into MonomerType itself, |
|
me@260
|
48 # so that we can use MonomerType.from_code3 to create the relevant |
|
me@260
|
49 # type of monomer. |
|
me@260
|
50 MonomerType.by_code3[code3] = self |
|
me@260
|
51 MonomerType.by_name[name] = self |
|
me@260
|
52 |
|
me@260
|
53 @classmethod |
|
me@260
|
54 def _initialize(cls, type_letter, codes=data.codes.codes): |
|
me@260
|
55 """Create all relevant instances of MonomerType. |
|
me@260
|
56 |
|
me@260
|
57 `type_letter` is either of: |
|
me@260
|
58 |
|
me@260
|
59 - 'p' for protein |
|
me@260
|
60 - 'd' for DNA |
|
me@260
|
61 - 'r' for RNA |
|
me@260
|
62 |
|
me@260
|
63 `codes` is a table of monomer codes |
|
me@260
|
64 """ |
|
me@260
|
65 for type, code1, is_modified, code3, name in codes: |
|
me@260
|
66 if type == type_letter: |
|
me@260
|
67 cls(name, code1, code3, is_modified) |
|
me@260
|
68 |
|
me@260
|
69 @classmethod |
|
me@260
|
70 def from_code1(cls, code1): |
|
me@260
|
71 """Return monomer type by one-letter code.""" |
|
me@260
|
72 return cls.by_code1[code1.upper()] |
|
me@260
|
73 |
|
me@260
|
74 @classmethod |
|
me@260
|
75 def from_code3(cls, code3): |
|
me@260
|
76 """Return monomer type by three-letter code.""" |
|
me@260
|
77 return cls.by_code3[code3.upper()] |
|
me@260
|
78 |
|
me@260
|
79 @classmethod |
|
me@260
|
80 def from_name(cls, name): |
|
me@260
|
81 """Return monomer type by |