%{ #include "sequencedb.h" #include "hscore.h" #include "complexsequence.h" #include "complexevalset.h" #include "randommodel.h" #include "randomdb.h" #include "protein.h" %} struct ProteinDB boolean is_single_seq !def="FALSE" boolean is_random_db !def="FALSE" ComplexSequence * single SequenceDB * sdb ComplexSequenceEvalSet * cses RandomProteinDB * rnd boolean test_dna !def="FALSE" %info A database of proteins. This takes either a single sequence or a sequence database and allows a method expecting protein complexsequences to loop over it. It also provides generic database indexing for this Horrible physical dependency in this generated by the random protein db requiring histogram and randommodel stuff. Yuk! %% api object ProteinDB des free_ProteinDB endobject func new_ProteinDB_from_single_seq func single_fasta_ProteinDB func new_ProteinDB endapi %{ #include "proteindb.h" %func shows the Hscore by the ProteinDB information %arg hs High Score structure ofp output file %% void show_Hscore_ProteinDB(Hscore * hs,FILE * ofp) { int i; for(i=0;ilen;i++) fprintf(ofp,"Query [%20s] Target [%20s] %d\n",hs->ds[i]->query->name,hs->ds[i]->target->name,hs->ds[i]->score); } %func Gets Protein sequence out from the proteindb using the information stored in dataentry %simple get_entry %arg prodb r ProteinDB database de r DataEntry information %% Protein * get_Protein_from_ProteinDB(ProteinDB * prodb,DataEntry * de) { Sequence * seq; if( prodb->is_single_seq == TRUE ) { return Protein_from_Sequence(hard_link_Sequence(prodb->single->seq)); } /* we need to get out the Sequence from seqdb */ seq = get_Sequence_from_SequenceDB(prodb->sdb,de); if( seq == NULL ) { warn("Cannot get entry for %s from Protein db",de->name); return NULL; } seq->type = SEQUENCE_PROTEIN; /* force to protein */ return Protein_from_Sequence(seq); } %func adds information to dataentry from ProteinDB This information is the necessary information for the proteindb to find this sequence later %% boolean dataentry_add_ProteinDB(DataEntry * de,ComplexSequence * cs,ProteinDB * prodb) { if( cs == NULL || cs->seq == NULL ){ warn("Adding a dataentry with a NULL complex sequence or null internal sequence. Nope!"); return FALSE; } if( prodb->is_single_seq == FALSE) add_SequenceDB_info_DataEntry(prodb->sdb,de); de->name = stringalloc(cs->seq->name); return TRUE; } %func top level function which opens the protein database %arg prodb protein database return_status w the status of the open from database.h %% ComplexSequence * init_ProteinDB(ProteinDB * prodb,int * return_status) { ComplexSequence * cs; Sequence * seq; if( prodb->is_single_seq == TRUE) { *return_status = DB_RETURN_OK; return prodb->single; } seq = init_SequenceDB(prodb->sdb,return_status); if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) { return NULL; /** error already reported **/ } if( prodb->test_dna == FALSE ) { seq->type = SEQUENCE_PROTEIN; } else { if( seq->type != SEQUENCE_PROTEIN ) { warn("For sequence %s, looks like a DNA sequence. Failing"); *return_status = DB_RETURN_ERROR; } } cs = new_ComplexSequence(seq,prodb->cses); free_Sequence(seq); return cs; } %func function which reloads the database %arg last previous complex sequence, will be freed return_status w return_status of the load %% ComplexSequence * reload_ProteinDB(ComplexSequence * last,ProteinDB * prodb,int * return_status) { ComplexSequence * cs; Sequence * seq; if( prodb->is_single_seq == TRUE ) { *return_status = DB_RETURN_END; return NULL; } /** free Complex Sequence **/ if( last != NULL ) { free_ComplexSequence(last); } seq = reload_SequenceDB(NULL,prodb->sdb,return_status); if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) { return NULL; /** error already reported **/ } if( prodb->test_dna == FALSE ) { seq->type = SEQUENCE_PROTEIN; } else { if( seq->type != SEQUENCE_PROTEIN ) { warn("For sequence %s, looks like a DNA sequence. Failing"); *return_status = DB_RETURN_ERROR; } } cs = new_ComplexSequence(seq,prodb->cses); free_Sequence(seq); return cs; } %func top level function which closes the protein database %arg cs last complex sequence prodb protein database %% boolean close_ProteinDB(ComplexSequence * cs,ProteinDB * prodb) { if( prodb->is_single_seq == TRUE ) { return TRUE; } if( cs != NULL) { free_ComplexSequence(cs); } return close_SequenceDB(NULL,prodb->sdb); } %func To make a new protein database from a single Sequence with default amino acid mapping %arg seq sequence which as placed into ProteinDB structure. %% ProteinDB * new_ProteinDB_from_single_seq(Sequence * seq) { ComplexSequenceEvalSet * cses; ComplexSequence * cs; cses = default_aminoacid_ComplexSequenceEvalSet(); cs = new_ComplexSequence(seq,cses); free_ComplexSequenceEvalSet(cses); return new_ProteinDB_from_single_cseq(cs); } %func To make a new protein database from a single ComplexSequence %arg cs complex sequence which is held. %% ProteinDB * new_ProteinDB_from_single_cseq(ComplexSequence * cs) { ProteinDB * out; out = ProteinDB_alloc(); out->is_single_seq = TRUE; out->single = cs; return out; } %func pre-packed single fasta protein database %arg filename name of fasta file %% ProteinDB * single_fasta_ProteinDB(char * filename) { return new_ProteinDB(single_fasta_SequenceDB(filename),default_aminoacid_ComplexSequenceEvalSet()); } %func To make a new protein database %arg seqdb sequence database cses protein evaluation set %% ProteinDB * new_ProteinDB(SequenceDB * seqdb,ComplexSequenceEvalSet * cses) { ProteinDB * out; /** should check sequence database **/ if( seqdb == NULL ) { warn("Cannot make ProteinDB from NULL SequenceDB object"); return NULL; } if( cses->type != SEQUENCE_PROTEIN ) { warn("You can't make a protein database with a non SEQUENCE_PROTEIN cses type [%d]",cses->type); return NULL; } out = ProteinDB_alloc(); out->sdb = seqdb; out->cses = cses; return out; } %}