/* K=7 r=1/2 Viterbi decoder with optional Intel SIMD * Dec 2001, Phil Karn, KA9Q */ #include #include #include #include "viterbi27.h" #include "parity.h" static int V27_init; int cpu_features(void); #if defined(SSE2) char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, SSE2 version"; #elif defined(SSE) char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, SSE version"; #elif defined(MMX) char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, MMX version"; #else char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, portable C version"; #endif #if defined(MMX) typedef union { long long p; char c[64]; } decision_t; #define EXTRACT_DECISION(d,state) ((d)->c[(state)] & 1) /* Branch metric lookup tables, indexed by state and input symbol Mettab27_1 is for the first symbol, Mettab27_2 for the second The first index is the input symbol (0-15) The second index is the state number */ unsigned char Mettab27_1[16][32] __attribute__((aligned(32))); unsigned char Mettab27_2[16][32] __attribute__((aligned(32))); #else typedef union { long long p; unsigned long w[2]; } decision_t; #define EXTRACT_DECISION(d,state) (((d)->w[(state)/32] >> ((state)%32)) & 1) unsigned char Branchtab27_1[32] __attribute__ ((aligned(32))); unsigned char Branchtab27_2[32] __attribute__ ((aligned(32))); #endif /* State info for instance of Viterbi decoder * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! */ struct v27 { unsigned char metrics1[64]; /* path metric buffer 1 */ unsigned char metrics2[64]; /* path metric buffer 2 */ decision_t *dp; /* Pointer to decision output for current bit */ unsigned char *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ decision_t *decisions; /* Beginning of decisions for block */ void *alloc_blk; /* Return value from malloc */ }; /* Create a new instance of a Viterbi decoder */ void *create_viterbi27(int len){ void *blk; struct v27 *vp; int state; if(!V27_init){ /* Check that CPU has required features */ #if defined(SSE2) if(!(cpu_features() & (1 << 26))){ fprintf(stderr,"viterbi27: CPU does not support SSE2 instructions\n"); exit(1); } #elif defined(SSE) if(!(cpu_features() & (1 << 25))){ fprintf(stderr,"viterbi27: CPU does not support SSE instructions\n"); exit(1); } #elif defined(MMX) if(!(cpu_features() & (1 << 23))){ fprintf(stderr,"viterbi27: CPU does not support MMX instructions\n"); exit(1); } #endif /* Initialize metric tables */ for(state=0;state < 32;state++){ #ifdef MMX int symbol; for(symbol = 0;symbol < 16;symbol++){ Mettab27_1[symbol][state] = parity((2*state) & V27POLYA) ? (15-symbol):symbol; Mettab27_2[symbol][state] = parity((2*state) & V27POLYB) ? (15-symbol):symbol; } #else Branchtab27_1[state] = parity((2*state) & V27POLYA) ? 15:0; Branchtab27_2[state] = parity((2*state) & V27POLYB) ? 15:0; #endif } V27_init = 1; } /* Malloc only guarantees 8-byte alignment, but we want to ensure that * the path metric arrays are on 32-byte boundaries. At least 16-byte * alignment is mandatory in the SSE2 version, but the Pentium III * cache line size is 32 bytes */ blk = malloc(sizeof(struct v27)+32); if((int)blk & 31){ /* Not on 32-byte boundary; shift up */ vp = (struct v27 *)(((int)blk + 32) & ~31); } else { vp = (struct v27 *)blk; } vp->alloc_blk = blk; /* Record original pointer from malloc for use by free() */ /* The decisions only need be 32-bit aligned */ #if defined(MMX) vp->dp = vp->decisions = malloc((len+6)*64); #else vp->dp = vp->decisions = malloc((len+6)*8); #endif vp->old_metrics = vp->metrics1; vp->new_metrics = vp->metrics2; return vp; } /* Initialize Viterbi decoder for start of new frame */ int init_viterbi27(void *p,int starting_state){ struct v27 *vp = p; memset(vp->metrics1,60,64); vp->old_metrics = vp->metrics1; vp->new_metrics = vp->metrics2; vp->dp = vp->decisions; vp->old_metrics[starting_state & 63] = 0; /* Bias known start state */ return 0; } /* Do Viterbi chainback */ int chainback_viterbi27( void *p, unsigned char *data, /* Decoded output data */ unsigned int nbits, /* Number of data bits */ unsigned int endstate){ /* Terminal encoder state */ struct v27 *vp = p; int k; decision_t *decisions = (decision_t *)(vp->decisions); /* Make room beyond the end of the encoder register so we can * accumulate a full byte of decoded data */ endstate %= 64; endstate <<= 2; decisions += 6; /* Look past tail */ while(nbits-- != 0){ k = EXTRACT_DECISION(&decisions[nbits],endstate >> 2); /* The store into data[] only needs to be done every 8 bits. * But this avoids a conditional branch, and the writes will * combine in the cache anyway */ data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); } return 0; } /* Delete instance of a Viterbi decoder */ void delete_viterbi27(void *p){ struct v27 *vp = p; if(vp != NULL){ free(vp->decisions); free(vp->alloc_blk); } } #if !defined(MMX) && !defined(SSE) & !defined(SSE2) /* Portable C version */ /* C-language butterfly */ #define BFLY(i) {\ unsigned char metric,m0,m1,decision;\ metric = ((Branchtab27_1[i] ^ sym1) + (Branchtab27_2[i] ^ sym2) + 1)/2;\ m0 = vp->old_metrics[i] + metric;\ m1 = vp->old_metrics[i+32] + (15 - metric);\ decision = (m0-m1) >= 0;\ vp->new_metrics[2*i] = decision ? m1 : m0;\ vp->dp->w[i/16] |= decision << ((2*i)&31);\ m0 -= (metric+metric-15);\ m1 += (metric+metric-15);\ decision = (m0-m1) >= 0;\ vp->new_metrics[2*i+1] = decision ? m1 : m0;\ vp->dp->w[i/16] |= decision << ((2*i+1)&31);\ } int update_viterbi27(void *p,unsigned char sym1,unsigned char sym2){ struct v27 *vp = p; unsigned char *tmp; int normalize = 0; vp->dp->w[0] = vp->dp->w[1] = 0; BFLY(0); BFLY(1); BFLY(2); BFLY(3); BFLY(4); BFLY(5); BFLY(6); BFLY(7); BFLY(8); BFLY(9); BFLY(10); BFLY(11); BFLY(12); BFLY(13); BFLY(14); BFLY(15); BFLY(16); BFLY(17); BFLY(18); BFLY(19); BFLY(20); BFLY(21); BFLY(22); BFLY(23); BFLY(24); BFLY(25); BFLY(26); BFLY(27); BFLY(28); BFLY(29); BFLY(30); BFLY(31); /* Renormalize metrics */ if(vp->new_metrics[0] > 150){ int i; unsigned char minmetric = 255; for(i=0;i<64;i++) if(vp->new_metrics[i] < minmetric) minmetric = vp->new_metrics[i]; for(i=0;i<64;i++) vp->new_metrics[i] -= minmetric; normalize = minmetric; } vp->dp++; tmp = vp->old_metrics; vp->old_metrics = vp->new_metrics; vp->new_metrics = tmp; return normalize; } #endif void emms_viterbi27(void){ #if defined(MMX) || defined(SSE) asm("emms"); #endif } /* Dump current decoder state for debugging */ void debug_viterbi27(void *p){ struct v27 *vp = p; int i,j; fprintf(stderr,"viterbi27 @ bit %d:\n",vp->dp - vp->decisions); fprintf(stderr,"metrics:"); for(i=0;i<64;i++){ fprintf(stderr," %3d",vp->old_metrics[i]); } fprintf(stderr,"\n"); fprintf(stderr,"decisions: "); for(i=0;i<64;i++){ j = EXTRACT_DECISION(vp->dp-1,i); fprintf(stderr,"%d",j); } fprintf(stderr,"\n"); }