%{ #include "wisebase.h" #include "histogram.h" #define HscoreLISTLENGTH 256 #define DATAENTRYSTDPOINTS 8 #define DATASCORESTORAGE_LENGTH 1024 typedef long BytePosition; %} struct DataEntry char * name // name of the entry int data[DATAENTRYSTDPOINTS] // space for algorithms to use boolean is_reversed !def="FALSE" // for sequences. handy BytePosition byte_position !hidden // useful for indexers - hopefully long enough! char * filename !hidden !link // useful for indexers etc. %info A lightweight structure to represent the information a db search algorithm will want to store and *nothing* more about a single entry This object will be stored twice (once for the target and once for the query) for each comparison: they probably will be on different databases and different objects. The data field is just a number (8 at the moment) of int's available for databases to store useful information (eg, length) of the object. A number of extra fields are provided for convience sake for indexers, including byte_position and filename. %% struct DataScore DataEntry * query DataEntry * target int score double evalue int is_stored !hidden %info The basic one entry vs one entry structure. Each of the DataEntry datastructures actually store the information about the indexing etc. %% struct DataScoreStorage DataScore score_array[DATASCORESTORAGE_LENGTH] DataEntry query_array[DATASCORESTORAGE_LENGTH] DataEntry target_array[DATASCORESTORAGE_LENGTH] int curr_pos !def="0" %info This object was needed because for very large searches the memory usage of allocation DataScore and DataEntry's on the heap was becoming prohibative. This datastructure holds pre-allocated DataScore and DataEntry objects read for use. A complicated constructor/deconstructor pair for DataScore objects ensures that these can be used cleanly. %% struct Hscore DataScore ** ds !list !hidden DataScoreStorage ** store !list !len="st_" !hidden Histogram * his double score_level // passed into should_store function boolean (*should_store)(int given_score,double internal_score_level) !func float (*score_to_his)(int given_score) !func int report_level // number of sequences to report on long total // total number of scores (duplicated info in histogram) %info Holds the information about a db search. Meant to be very lightweight The histogram is carried for on-the-fly histogram storage outside of the database. The idea is that the function should_store will tell whether the datascore structure should be stored (if it is NULL, it is always stored). The score_to_his function maps the score in datascore to the float in Histogram, allowing the scoring system of the search method to be on a different basis to the scoring system of the histogram. For most times, this is going to be Score2Bits To prevent too much dependency, the 'standard' way of making a histogram that has a bits cut off level is done using functions in the dynlibcross module (cross code), as it needs both Hscore and Probability. You should read dynlibcross module for the constructors for Hscore %% api object Hscore des free_Hscore func minimum_score_Hscore func maximum_score_Hscore func sort_Hscore_by_score func length_datascore_Hscore func get_datascore_Hscore func get_score_Hscore func get_evalue_Hscore func basic_show_Hscore endobject object DataScore des free_DataScore endobject object DataEntry des free_DataEntry endobject func std_score_Hscore endapi %{ #include "hscore.h" %func This gives you a standard Hscore module with a cutoff in score %% Hscore * std_score_Hscore(int cut_off,int report_stagger) { Hscore * out; out = Hscore_alloc_std(); out->his = new_Histogram(-1000,1000,100); out->score_level = cut_off; out->should_store = raw_should_store_Hscore; out->score_to_his = raw_score_to_his; out->report_level = report_stagger; return out; } %func This function is for the Hscore std constructor, %type internal %% boolean raw_should_store_Hscore(int score,double cutoff) { if( score > cutoff ) { return TRUE; } return FALSE; } %func This function is for the Hscore std constructor, %type internal %% float raw_score_to_his(int score) { return score; } %func Tells whether this score should be stored or not. Also updates Histogram if needed %% boolean should_store_Hscore(Hscore * hs,int score) { hs->total++; if( hs->report_level != -1 && (hs->total % hs->report_level == 0)) { if( hs->len > 0) { info("Done %d comparisons: last stored comparison was %s to %s",hs->total,hs->ds[hs->len-1]->query->name,hs->ds[hs->len-1]->target->name); } else { info("Done %d comparisons: No stored comparisons",hs->total); } } if( hs->his != NULL && hs->score_to_his != NULL ) { AddToHistogram(hs->his,(*hs->score_to_his)(score)); } if( hs->should_store == NULL ) { return TRUE; } return (*hs->should_store)(score,hs->score_level); } %func Returns the number of datascores in the hscore structure %simple length %arg obj r Hscore object %% int length_datascore_Hscore(Hscore * obj) { return obj->len; } %func Returns the specific datascore held at this position. This requires a considerable amount of memory duplication, so please dont process all your results by looping through this. %simple datascore %arg hs r Hscore object i position to be read return o New datascore object %% DataScore * get_datascore_Hscore(Hscore * hs,int i) { DataScore * out; out = new_DataScore(); copy_DataEntry(hs->ds[i]->query,out->query); copy_DataEntry(hs->ds[i]->target,out->target); out->score = hs->ds[i]->score; out->evalue = hs->ds[i]->evalue; return out; } %func Copies the info from one DataEntry to another %type internal %% void copy_DataEntry(DataEntry * from,DataEntry * to) { int i; to->name = stringalloc(from->name); for(i=0;idata[i] = from->data[i]; to->is_reversed = from->is_reversed; to->byte_position = from->byte_position; to->filename = from->filename; /* linked! */ } %func Returns the score of the specific datascore held at this position. %simple score %arg hs r Hscore object i position to be read return score %% int get_score_Hscore(Hscore * hs,int i) { return hs->ds[i]->score; } %func Returns the evalue of the specific datascore held at this position. %simple evalue %arg hs r Hscore object i position to be read return evalue %% double get_evalue_Hscore(Hscore * hs,int i) { return hs->ds[i]->evalue; } %func If a histogram is present, tries to fit the histogram and then gives evalues to all the scores in the Hscore model %% boolean fit_Hscore_to_EVD(Hscore * hs,float guess_of_outliers) { int i; if( hs->his == NULL ) { warn("Your Hscore has no histogram structure, and so no EVD can be fitted"); return FALSE; } if( ExtremeValueFitHistogram(hs->his,TRUE,guess_of_outliers) == 0 ) { warn("Extreme Value distribution is unable to be fitted. Sorry!"); return FALSE; } for(i=0;ilen;i++) { hs->ds[i]->evalue = ExtremeValueE((*hs->score_to_his)(hs->ds[i]->score),hs->his->param[EVD_MU],hs->his->param[EVD_LAMBDA],hs->his->total); } return TRUE; } %func gets the minimum score from Hscore %% int minimum_score_Hscore(Hscore * hs) { int i; int min; if( hs->len == 0) { warn("Can't get a minimum score with no entries"); return 0; } for(i=1,min=hs->ds[0]->score;ilen;i++) { if( min > hs->ds[i]->score ) { min = hs->ds[i]->score; } } return min; } %func gets the maximum score from Hscore %% int maximum_score_Hscore(Hscore * hs) { int i; int max; if( hs->len == 0) { warn("Can't get a minimum score with no entries"); return 0; } for(i=1,max=hs->ds[0]->score;ilen;i++) { if( max < hs->ds[i]->score ) { max = hs->ds[i]->score; } } return max; } %func The most baby-talk showing of Hscore %simple show %arg %% void basic_show_Hscore(Hscore * hs,FILE * ofp) { int i; if( hs == NULL ) { warn("parsing in a NULL Hscore object - cannot show!"); fprintf(ofp,"parsing in a NULL Hscore object - cannot show!"); } for(i=0;ilen;i++) { fprintf(ofp,"%3d Query: %12s Target: %12s Score %d\n",i, hs->ds[i]->query->name == NULL ? "NoName" : hs->ds[i]->query->name, hs->ds[i]->target->name == NULL ? "NoName" : hs->ds[i]->target->name, hs->ds[i]->score); } } %func As it says, sorts the high score by its score %arg hs Hscore to be sorted %% void sort_Hscore_by_score(Hscore * hs) { sort_Hscore(hs,compare_DataScore_by_score); } %func Used to compare two datascores for /sort_Hscore_by_score %type internal %arg %% int compare_DataScore_by_score(DataScore * one,DataScore * two) { if( one->score == two->score ) { if( one->query != NULL && one->query->name != NULL && two->query != NULL && two->query->name != NULL ) return strcmp(one->query->name,two->query->name); else return 1; } return two->score - one->score; } %func The best way to make a new DataScore. Allocates the query and target DataEntry structures as well as the DataScore structure. %type internal %arg %% DataScore * new_DataScore(void) { DataScore * ds; ds = DataScore_alloc(); ds->query = DataEntry_alloc(); ds->target = DataEntry_alloc(); return ds; } %func Gets a new DataScore from Storage %type internal %% DataScore * new_DataScore_from_storage(Hscore * hs) { DataScoreStorage * new; DataScoreStorage * curr; if( hs->st_len == 0 ) { new = new_DataScoreStorage(); if( new == NULL ) { warn("could not make inital data score storage!"); return NULL; } add_st_Hscore(hs,new); curr = new; } else { curr = hs->store[hs->st_len-1]; if( curr->curr_pos == DATASCORESTORAGE_LENGTH ) { new = new_DataScoreStorage(); if( new == NULL ) { warn("could not make data score storage block %d!",hs->st_len-1); return NULL; } add_st_Hscore(hs,new); curr = new; } } return &curr->score_array[curr->curr_pos++]; } %func Correctly handles destruction of a datascore %% !deconstructor DataScore * free_DataScore(DataScore * obj) { if( obj->is_stored == 1 ) { return NULL; /* don't free! */ } if( obj->dynamite_hard_link > 1 ) { obj->dynamite_hard_link--; return NULL; } if( obj->query != NULL ) free_DataEntry(obj->query); if( obj->target != NULL ) free_DataEntry(obj->target); return NULL; } %func Correctly handles destruction of DataScoreStorage, by freeing members in data storage %% !deconstructor DataScoreStorage * free_DataScoreStorage(DataScoreStorage * obj) { int i; for(i=0;icurr_pos;i++) { if( obj->query_array[i].name != NULL ) { ckfree(obj->query_array[i].name); } if( obj->target_array[i].name != NULL ) { ckfree(obj->target_array[i].name); } } ckfree(obj); return NULL; } %func Makes a new DataScoreStorage with all the pointers connected correctly %% DataScoreStorage * new_DataScoreStorage(void) { DataScoreStorage * out; int i; out = DataScoreStorage_alloc(); if( out == NULL ) { warn("Unable to make a new DataScoreStorage block with blocksize %d",DATASCORESTORAGE_LENGTH); return NULL; } for(i=0;iscore_array[i].query = &out->query_array[i]; out->score_array[i].target = &out->target_array[i]; out->score_array[i].is_stored = 1; } return out; } %}