/* ARIADNE V.1.3 Copyright Richard Mott 2000 Wellcome Trust Centre For Human Genetics Univeristy of Oxford Roosevelt Drive Oxford OX3 7AD UK The software package ARIADNE is distributed in the hope that it will be useful, but in order that the University as a charitable foundation protects its assets for the benefit of its educational and research purposes, the University makes clear that no condition is made or to be implied, nor is any warranty given or to be implied, as to the accuracy of ARIADNE, or that it will be suitable for any particular purpose or for use under any specific conditions, or that the content or use of ARIADNE will not constitute or result in infringement of third-party rights. Furthermore, the University disclaims all responsibility for the use which is made of ARIADNE. */ #include #include #include"seq_util.h" #include"ariadne.h" #include"gapstat.h" /* ***************************************************************************** */ int TopAlignProteinToProfile( SEQUENCE *protein, PROFILE *pro, int gap_start, int gap_extend, int top, double ethresh, int self, double lambda0, double K0, double entropy, double alpha ) { /* ***************************************************************************** Align a protein sequence to a PSI-BLAST profile, using a modified Smith-Waterman local alignment, with an affine gap penalty . protein is a SEQUENCE* containing the protein seq pro is a PROFILE* containing the psi-blast profile gap_start, gap_extend are gap penalties for inserting a gap in the profile or sequence */ int **S; int **H; int **V; int islands=0; ISLAND *island; ISLAND ***island_ptr; unsigned int **path; int plen = pro->length+1; int len = protein->len; int len2 = len+1; int i, j; int max=-1, I=0, J=0; char *seq = protein->s; int hits = 0; /* make sure all penalties have the correct sign (negative) */ gap_start = gap_start > 0 ? -gap_start : gap_start; gap_extend = gap_extend > 0 ? -gap_extend : gap_extend; gap_start += gap_extend; /* allocate the memory */ S = (int**)calloc( len2, sizeof(int*))+1; H = (int**)calloc( len2, sizeof(int*))+1; V = (int**)calloc( len2, sizeof(int*))+1; for(i=-1;ilength; for(j=0;j h2 ) { if ( h1 < 0 ) h = 0; else { h = h1; path[i-1][j] |= HORIZ << 8; } } else { if ( h2 < 0 ) h = 0; else { h = h2; path[i-1][j] |= DIAG << 8; } } H[i][j] = h; /* vertical (insertion in profile) */ v1 = V[i][j-1] + gap_extend; v2 = S[i][j-1] + gap_start; if ( v1 > v2 ) { if ( v1 < 0 ) v = 0; else { v = v1; path[i][j-1] |= VERT << 16; } } else { if ( v2 < 0 ) v = 0; else { v = v2; path[i][j-1] |= DIAG << 16; } } V[i][j] = v; if ( v > h ) { s = v; p = VERT; } else { s = h; p = HORIZ; } /* diagonal (match) */ m = pro->profile[j][(int)seq[i]]; d = S[i-1][j-1] + m; if ( d >= s ) { s = d; p = DIAG; } /* update the path (note defaults are 0 becuase calloc was used to initialise */ /* printf( "i %d j %d s %d m %d c %c\n", i, j, s, m, seq[i] ); */ if ( s > 0 ) { S[i][j] = s; path[i][j] |= p; if ( p == DIAG ) island_ptr[i][j] = island_ptr[i-1][j-1] ? island_ptr[i-1][j-1] : &island[islands++]; else if ( p == VERT ) island_ptr[i][j] = island_ptr[i][j-1] ? island_ptr[i][j-1] : &island[islands++]; else island_ptr[i][j] = island_ptr[i-1][j] ? island_ptr[i-1][j] : &island[islands++]; if ( island_ptr[i][j]->score < s ) { island_ptr[i][j]->score = s; island_ptr[i][j]->i = i; island_ptr[i][j]->j = j; } } } } /* backtrack */ qsort( island, islands, sizeof(ISLAND), IslandCmp ) ; printf( "\n"); hits = 0; while(1) { int length; PROFILE_ALIGNMENT *align; double percent, theta, logkappa, pval; pval = EmpiricalGEM( lambda0, K0, entropy, alpha, protein->len, pro->length, &theta, &logkappa, island[hits].score ); if ( ethresh > 0 && pval >= ethresh ) break; if ( ethresh <= 0 && hits >= top-1 ) break; align = ProteinBacktrack( path, island[hits].i, island[hits].j, &length ); percent = PercentIdentity( align, length, protein, pro ); PrintProteinProfileAlignment( stdout, hits+1, pro, protein, align, length, island[hits].score , pval, percent, K0*exp(logkappa), lambda0*theta, entropy, alpha, 1 ); free(align); hits++; } /* free the arrays */ for(i=-1;iscore - A->score; }