#!/usr/local/bin/perl =head1 NAME estwise.pl =head1 SYNOPSIS estwise.pl =head1 DESCRIPTION compares a protein sequence to a DNA database, allowing for frame shift errors =head1 OPTIONS This perl script is just an example. For more options you should read the perl script and customise it for your own applications =cut use Wise2; use strict; # so we can use STDOUT as a bare word. no strict 'subs'; my $pro_file = shift; my $dna_file = shift; if( !defined $dna_file ) { system("perldoc $0"); exit(1); } # read in the protein file open(PRO,$pro_file) || die "Could not open $pro_file!"; my $seq = &Wise2::Sequence::read_fasta_Sequence(PRO); my $pro = &Wise2::Protein::Protein_from_Sequence($seq); # Convert the protein to a profile-HMM (called a threestatemodel in wise2 speak) my $comp = &Wise2::CompMat::read_Blast_file_CompMat("blosum62.bla"); my $rm = &Wise2::default_RandomModel(); my $tsm = &Wise2::ThreeStateModel::ThreeStateModel_from_half_bit_Sequence($pro,$comp,$rm,-12,-2); # make a profile-HMM database from the single profile-HMM my $tdb = &Wise2::ThreeStateDB::new_single_ThreeStateDB($tsm,$rm); # destroy tsm memory... $tsm = 0; # load up dna file for as a simple sequence db my $sdb = &Wise2::single_fasta_SequenceDB($dna_file); if( $sdb == 0 ) { die "Could not read in $dna_file!"; } # convert to a typed (cDNA) db my $cdb = &Wise2::new_cDNADB($sdb); # load parameters to the estwise search # this is the indel rate my $cp = &Wise2::flat_cDNAParser(0.001); # codon table my $ct = &Wise2::CodonTable::read_CodonTable_file("codon.table"); #this means we are not using any codon bias my $cm = &Wise2::flat_CodonMapper($ct); # this is the substitution error $cm->sprinkle_errors_over_CodonMapper(0.001); # random model needed if we are not using syn my $rmd = &Wise2::RandomModelDNA_std(); # means estwise3 algorithm my $alg = &Wise2::alg_estwrap_from_string("333"); my $dbsi = &Wise2::new_serial_DBSearchImpl(); #print STDERR "Launching ($tdb,$cdb,$cp,$cm,$rmd,1,$alg,0) \n"; my $hs = &Wise2::Hscore_from_TSM_estwise($tdb,$cdb,$cp,$cm,$rmd,1,$alg,0,0.9,-1,1,$dbsi); # simple basic show #$hs->show(STDOUT); $hs->sort_Hscore_by_score(); # # This is how to loop over hscore object. Notice # that we don't make a DataScore object until we really have # to - this is because the memory managment in hscore is # customised - meaning retrieval of a DataScore means an entire # allocation and copy of contents (usually it is just up'ing a reference count) # # Do not EVER loop over hs->datascore(...) putting everything into # an array (for a nice foreach call). It will kill your machine for # large databases. # my $len = $hs->length(); my($i,$ds,$alb,$cdna); for($i=0;$i<$len;$i++) { if( &Wise2::Score2Bits($hs->score($i)) < 25 ) { last; } $ds = $hs->datascore($i); print STDERR sprintf("$i %12s %12s [%s] %4.2f\n",$ds->query()->name(),$ds->target()->name,$ds->target->is_reversed == 1 ? "-" : "+",&Wise2::Score2Bits($ds->score())); } # # Get out the sequence of the first ones up to 5 # # Align them and report back # for($i=0;$i<$len && $i < 5;$i++) { if( &Wise2::Score2Bits($hs->score($i)) < 25 ) { last; } $ds = $hs->datascore($i); $cdna = $cdb->get_entry($ds->target); if( ! $cdna ) { warn("Can't get entry!"); next; } $alb= &Wise2::AlnBlock_from_Protein_estwise_wrap($pro,$cdna,$cp,$cm,$ct,$comp,-12,-2,0,$rmd,0,$rm,0.9,1); &Wise2::protcdna_ascii_display($alb,$pro->baseseq->seq,$pro->baseseq->name,$pro->baseseq->offset,$cdna,$ct,15,50,0,STDOUT); } #explicitly reap memory - helps bug tracking! $tsm = 0; $hs = 0; $ds = 0; #$tdb =0; #$seq =0; $pro = 0; $comp = 0; $rm = 0; $cp = 0; $ct = 0; print STDERR "Finished!\n";