allpy
changeset 1123:3ce9ab9a5d29
add util pfamdomains2fasta
pfamdomains2fasta converts input XML file with Pfam domains
to fasta files (input of pair-cores), one fasta file per domain.
author | Boris Nagaev <bnagaev@gmail.com> |
---|---|
date | Fri, 21 Dec 2012 15:08:34 +0400 |
parents | 5f9c14e679c1 |
children | 6ab975d2c4bf |
files | utils/pfamdomains2fasta.py |
diffstat | 1 files changed, 25 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/utils/pfamdomains2fasta.py Fri Dec 21 15:08:34 2012 +0400 1.3 @@ -0,0 +1,25 @@ 1.4 +#!/usr/bin/python 1.5 + 1.6 +import sys 1.7 +from xml.etree.cElementTree import parse, ElementTree, Element, SubElement 1.8 + 1.9 +xml = parse(sys.stdin) 1.10 +elist = xml.getroot() 1.11 +assert elist.tag == 'LIST' 1.12 +for pfam in elist: 1.13 + assert pfam.tag == 'PFAM' 1.14 + pfam_id = pfam.get('pfam_id') 1.15 + o = open(pfam_id + ".fasta", 'w') 1.16 + for domain in pfam.findall('DOMAIN'): 1.17 + assert domain.tag == 'DOMAIN' 1.18 + file = domain.get('file') 1.19 + chain = domain.get('chain') 1.20 + s_pdb = int(domain.get('s_pdb')) 1.21 + e_pdb = int(domain.get('e_pdb')) 1.22 + sequence = '' 1.23 + for res in domain.findall('RES'): 1.24 + sequence += res.get('res_name') 1.25 + o.write('>file:%s:%s:0:%i:%i\n' % (file, chain, s_pdb, e_pdb)) 1.26 + o.write(sequence) 1.27 + o.write('\n') 1.28 +