%{ #include "sequence.h" #include "codon.h" %} api func reverse_complement_Sequence func magic_trunc_Sequence func translate_Sequence endapi %{ #include "sequence_codon.h" %func This both complements and reverses a sequence, - a common wish! The start/end are correct with respect to the start/end of the sequence (ie start = end, end = start). %arg seq r Sequence to that is used to reverse (makes a new Sequence) return o new Sequence which is reversed %% Sequence * reverse_complement_Sequence(Sequence * seq) { Sequence * out; int i; int j; if( is_dna_Sequence(seq) != TRUE ) { warn("Cannot reverse complement non-DNA sequence... type is %s",Sequence_type_to_string(seq->type)); return NULL; } out = Sequence_from_static_memory(seq->name,seq->seq); for(j=0,i=seq->len-1;i >= 0;i--,j++) { out->seq[j] = char_complement_base(seq->seq[i]); /*fprintf(stderr,"In position %d placed %c from %c\n",j,out->seq[j],seq->seq[i]);*/ } out->len = strlen(seq->seq); out->offset = seq->end; out->end = seq->offset; out->type = seq->type; return out; } %func Clever function for dna sequences. When start < end, truncates normally when start > end, truncates end,start and then reverse complements. ie. If you have a coordinate system where reverse sequences are labelled in reverse start/end way, then this routine produces the correct sequence. %arg seq r sequence that is the source to be truncated start r start point end r end point return o new Sequence which is truncated/reversed %% Sequence * magic_trunc_Sequence(Sequence * seq,int start,int end) { Sequence * temp; Sequence * out; if( is_dna_Sequence(seq) == FALSE) { warn("Cannot magic truncate on a non DNA sequence... type is %s",Sequence_type_to_string(seq->type)); return NULL; } if( start < 0 || end < 0 ) { warn("Attempting a magic truncation on indices which are less than zero [%d:%d]. Clearly impossible",start,end); return NULL; } if( start < end ) { return trunc_Sequence(seq,start,end); } else { temp = trunc_Sequence(seq,end,start); if( temp == NULL ) { warn("Unable to truncate sequence"); return NULL; } out = reverse_complement_Sequence(temp); free_Sequence(temp); return out; } } %func This translates a DNA sequence to a protein. It assummes that it starts at first residue (use trunc_Sequence to chop a sequence up). %simple translate %arg dna r DNA sequence to be translated ct r Codon table to do codon->aa mapping return o new protein sequence %% Sequence * translate_Sequence(Sequence * dna,CodonTable * ct) { Sequence * out; int i; int j; int len; char * seq; char * name; char buffer[512]; if( is_dna_Sequence(dna) == FALSE) { warn("Trying to make a translation from a non DNA sequence... type is [%s]",Sequence_type_to_string(dna->type)); return NULL; } len = dna->len/3 + 1; seq = ckcalloc(len,sizeof(char)); sprintf(buffer,"%s.tr",dna->name == NULL ? "NoNameDNASeq" : dna->name); name = stringalloc(buffer); out = Sequence_from_dynamic_memory(name,seq); for(i=0,j=0;ilen-3;i+=3,j++) { out->seq[j] = aminoacid_from_seq(ct,dna->seq+i); } out->seq[j] = '\0'; out->type = SEQUENCE_PROTEIN; out->len = strlen(out->seq); return out; } %func This a) sees how many non ATGCN characters there are in Seq b) If the level is below fraction a) flips non ATGC chars to N b) writes number of conversions to number_of_conver c) returns TRUE c) else returns FALSE fraction of 0.0 means completely intolerant of errors fraction of 1.0 means completely tolerant of errors %simple force_to_dna %arg seq rw sequence object read and converted fraction r number 0..1 for tolerance of conversion number_of_conver wN number of conversions actually made return r TRUE for conversion to DNA, FALSE if not %% boolean force_to_dna_Sequence(Sequence * seq,double fraction,int * number_of_conver) { int count =0; int i; if( seq == NULL ) { warn("Attempting to force a sequence with no Sequence object!\n"); return FALSE; } if( seq->len <= 0 ) { warn("Trying to make a sequence with a length of %d. Bad news!",seq->len); return FALSE; } for(i=0;ilen;i++) { /* if it is lower case, uppercase it! */ seq->seq[i] = (char)toupper((int)seq->seq[i]); if( !is_valid_base_char(seq->seq[i]) ) { count++; } } if( ((double)count/(double)seq->len) < fraction ) { seq->type = SEQUENCE_DNA; if( count != 0 ) { for(i=0;ilen;i++) { if( !is_valid_base_char(seq->seq[i]) ) { seq->seq[i] = 'N'; } } } if( number_of_conver != NULL ) { *number_of_conver = count; } return TRUE; } else { if( number_of_conver != NULL ) { *number_of_conver = count; } return FALSE; } }