#!/usr/bin/env python # Created: Wed Jun 21 15:53:22 2000 # Last changed: Time-stamp: <00/12/02 15:56:27 thomas> # thomas@cbs.dtu.dk, http://www.cbs.dtu.dk/thomas # File: xbb_translations.py import string, re, regsub import os, sys # os.system, sys.argv import time sys.path.insert(0, '.') from Tkinter import * from Bio import Seq from Bio import Alphabet from Bio.Alphabet import IUPAC from Bio import Translate from Bio.Data import IUPACData class xbb_translations: def __init__(self): "" def frame1(self, seq, translation_table = 1): dna = Seq.Seq(seq, IUPAC.unambiguous_dna) trans = Translate.unambiguous_dna_by_id[translation_table] protein = trans.translate(dna) return protein def complement(self, seq): return string.join(map(lambda x:IUPACData.ambiguous_dna_complement[x], map(None,seq)),'') def reverse(self, seq): r = map(None, seq) r.reverse() return string.join(r,'') def antiparallel(self, seq): s = self.complement(seq) s = self.reverse(s) return s def frame(self, seq, frame, translation_table = 1): dna = Seq.Seq(seq, IUPAC.unambiguous_dna) trans = Translate.unambiguous_dna_by_id[translation_table] if not ((-3 <= frame <= -1) or (1 <= frame <= 3)): frame = 1 protein = trans.translate(dna) return protein def header_nice(self, txt, seq): length = len(seq) if length > 20: short = '%s ... %s' % (seq[:10], seq[-10:]) else: short = seq date = time.strftime('%y %b %d, %X', time.localtime(time.time())) res = '%s: %s, ' % (txt,date) for nt in ['a','t','g','c']: res = res + '%s:%d ' % (nt, string.count(seq, string.upper(nt))) res = res + '\nSequence: %s, %d nt, %0.2f %%GC\n' % (string.lower(short),length, self.gc(seq)) res = res + '\n\n' return res def frame_nice(self, seq, frame, translation_table = 1): length = len(seq) protein = self.frame(seq, frame, translation_table) res = self.header_nice('Plus one frame translation',seq) for i in range(0,length,60): subseq = seq[i:i+60] p = i/3 res = res + '%d/%d\n' % (i+1, i/3+1) res = res + string.join(map(None,protein[p:p+20]),' ') + '\n' # seq res = res + string.lower(subseq) + '%5d %%\n' % int(self.gc(subseq)) return res def gc(self, seq): ngc = string.count(seq,'G') + string.count(seq,'C') if ngc == 0: return 0.0 gc = (100.0*ngc)/len(seq) return gc def gcframe(self, seq, translation_table = 1): # always use uppercase nt-sequence !! comp = self.complement(seq) anti = self.reverse(comp) length = len(seq) frames = {} for i in range(0,3): #print i+1, seq[i:] frames[i+1] = self.frame1(seq[i:], translation_table) #print -(i+1), anti[i:] frames[-(i+1)] = self.reverse(self.frame1(anti[i:], translation_table)) #print len(frames[i+1]) res = self.header_nice('GCFrame', seq) # if length > 20: # short = '%s ... %s' % (seq[:10], seq[-10:]) # else: # short = seq # date = time.strftime('%y %b %d, %X', time.localtime(time.time())) # res = 'GCFrame: %s, ' % date # for nt in ['a','t','g','c']: # res = res + '%s:%d ' % (nt, string.count(seq, string.upper(nt))) # res = res + '\nSequence: %s, %d nt, %0.2f %%GC\n' % (string.lower(short),length, self.gc(seq)) # res = res + '\n\n' for i in range(0,length,60): subseq = seq[i:i+60] csubseq = comp[i:i+60] p = i/3 # print 3, frames[3][p:p+20] # print 2, frames[2][p:p+20] # print 1, frames[1][p:p+20] # print -1, frames[-1][p:p+20] # print -2, frames[-2][p:p+20] # print -3,frames[-3][p:p+20] # + frames res = res + '%d/%d\n' % (i+1, i/3+1) res = res + ' ' + string.join(map(None,frames[3][p:p+20]),' ') + '\n' res = res + ' ' + string.join(map(None,frames[2][p:p+20]),' ') + '\n' res = res + string.join(map(None,frames[1][p:p+20]),' ') + '\n' # seq res = res + string.lower(subseq) + '%5d %%\n' % int(self.gc(subseq)) res = res + string.lower(csubseq) + '\n' # - frames res = res + string.join(map(None,frames[-2][p:p+20]),' ') +' \n' res = res + ' ' + string.join(map(None,frames[-1][p:p+20]),' ') + '\n' res = res + ' ' + string.join(map(None,frames[-3][p:p+20]),' ') + '\n\n' return res if __name__ == '__main__': #s = 'GCCCTTTCTTATTAGTGCTACCGCTAATAGGTAAATATGAAAAACCTTTG' s = 'ATTCCGGTTGATCCTGCCGGACCCGACCGCTATCGGGGTAGGGATAAGCCATGGGAGTCTTACACTCCCGGGTAAGGGAGTGTGGCGGACGGCTGAGTAACACGTGGCTAACCTACCCTCGGGACGGGGATAACCCCGGGAAACTGGGGATAATCCCCGATAGGGAAGGAGTCCTGGAATGGTTCCTTCCCTAAAGGGCTATAGGCTATTTCCCGTTTGTAGCCGCCCGAGGATGGGGCTACGGCCCATCAGGCTGTCGGTGGGGTAAAGGCCCACCGAACCTATAACGGGTAGGGGCCGTGGAAGCGGGAGCCTCCAGTTGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCACCAGGCGCGAAACGTCCCCAATGCGCGAAAGCGTGAGGGCGCTACCCCGAGTGCCTCCGCAAGGAGGCTTTTCCCCGCTCTAAAAAGGCGGGGGAATAAGCGGGGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCCGCGAGTGGTCGGGGTGATTACTGGGCCTAAAGCGCCTGTAGCCGGCCCACCAAGTCGCCCCTTAAAGTCCCCGGCTCAACCGGGGAACTGGGGGCGATACTGGTGGGCTAGGGGGCGGGAGAGGCGGGGGGTACTCCCGGAGTAGGGGCGAAATCCTTAGATACCGGGAGGACCACCAGTGGCGGAAGCGCCCCGCTA' test = xbb_translations() # for i in range(0,4): # print test.frame1(s[i:]) #print s #print test.complement(s) print '============================================================' print test.gcframe(s) # for i in Translate.unambiguous_dna_by_id.keys(): # print Translate.unambiguous_dna_by_id[i].table.names[0]