Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/3ce9ab9a5d29
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 00:21:55 2013
Кодировка:
allpy: 3ce9ab9a5d29

allpy

changeset 1123:3ce9ab9a5d29

add util pfamdomains2fasta pfamdomains2fasta converts input XML file with Pfam domains to fasta files (input of pair-cores), one fasta file per domain.
author Boris Nagaev <bnagaev@gmail.com>
date Fri, 21 Dec 2012 15:08:34 +0400
parents 5f9c14e679c1
children 6ab975d2c4bf
files utils/pfamdomains2fasta.py
diffstat 1 files changed, 25 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/utils/pfamdomains2fasta.py	Fri Dec 21 15:08:34 2012 +0400
     1.3 @@ -0,0 +1,25 @@
     1.4 +#!/usr/bin/python
     1.5 +
     1.6 +import sys
     1.7 +from xml.etree.cElementTree import parse, ElementTree, Element, SubElement
     1.8 +
     1.9 +xml = parse(sys.stdin)
    1.10 +elist = xml.getroot()
    1.11 +assert elist.tag == 'LIST'
    1.12 +for pfam in elist:
    1.13 +    assert pfam.tag == 'PFAM'
    1.14 +    pfam_id = pfam.get('pfam_id')
    1.15 +    o = open(pfam_id + ".fasta", 'w')
    1.16 +    for domain in pfam.findall('DOMAIN'):
    1.17 +        assert domain.tag == 'DOMAIN'
    1.18 +        file = domain.get('file')
    1.19 +        chain = domain.get('chain')
    1.20 +        s_pdb = int(domain.get('s_pdb'))
    1.21 +        e_pdb = int(domain.get('e_pdb'))
    1.22 +        sequence = ''
    1.23 +        for res in domain.findall('RES'):
    1.24 +            sequence += res.get('res_name')
    1.25 +        o.write('>file:%s:%s:0:%i:%i\n' % (file, chain, s_pdb, e_pdb))
    1.26 +        o.write(sequence)
    1.27 +        o.write('\n')
    1.28 +