/* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * */ #include #include #include "cdcn.h" /************************************************************************* * * cdcn_update finds the vectors x, noise * and tilt that maximize the a posteriori probability. * only one iteration is performed. this routine can be recalled to * perform multiple iterations if cycles permit. * Coded by Alex Acero (acero@s), November 1989 * Modified by Uday Jain, June 95 * *************************************************************************/ static float initialize (float [][NUM_COEFF+1], int, float *, float *, float, float [][NUM_COEFF+1], float *, float [][NUM_COEFF+1], int); static void correction(float *, float *, float *, float *, int); static float max_q (float *, float *, float *, float *, float *, float *, int, float *, int); float cdcn_update (float *z, /* The observed cepstrum vectors */ int num_frames, /* Number of frames in utterance */ CDCN_type *cdcn_variables) { float distortion; float *noise, *tilt, *codebook, *prob, *variance, *corrbook; int num_codes; /* * If error, dont bother */ if (!cdcn_variables->run_cdcn) return(-1e+30); /* * Open suitcase */ noise = cdcn_variables->noise; tilt = cdcn_variables->tilt; codebook = cdcn_variables->means; prob = cdcn_variables->probs; variance = cdcn_variables->variance; corrbook = cdcn_variables->corrbook; num_codes = cdcn_variables->num_codes; /* * Initialize if this is the first time the routine is being called */ if (cdcn_variables->firstcall) { /* Get initial estimates for noise, tilt, x, y */ initialize (z,num_frames,noise,tilt,SPEECH_THRESHOLD,codebook, prob,variance,num_codes); correction (tilt, noise, codebook, corrbook, num_codes); cdcn_variables->firstcall = FALSE; } /* * Compute the correction terms for the means * Perform one iteration of the estimation of n and q */ distortion = max_q (variance, prob, noise, tilt, codebook, corrbook, num_codes, z, num_frames); correction (tilt, noise, codebook, corrbook, num_codes); return (distortion); } /************************************************************************* * * initialize finds an estimate of the noise vector as the average of all * frames whose power is below a threshold. It also computes the average * log-energy of the frames whose log-energy is above that threshold * (supposedly speech frames). * Coded by Alex Acero (acero@s), November 1989 * Modified by Uday Jain, June 95 * *************************************************************************/ static float initialize (float data[][NUM_COEFF+1], /* The observation cepstrum vectors */ int num_frames, /* Number of frames in utterance */ float *noise, /* Cepstrum vector for the noise */ float tilt[], float speech_threshold, /* Threshold for speech and noise */ float codebook[][NUM_COEFF+1], float *prob, float var[][NUM_COEFF+1], int ncodes) { float noise_ceiling, /* Threshold to separate speech and noise */ min, /* Minimum log-energy in utterance */ speech_power, /* Average speech power */ codemean[NUM_COEFF+1], localprob[256]; int i, /* Index all frames in utterance */ j, /* Index all coefficients within frame */ noise_frames, /* Number of noise frames */ speech_frames; /* Number of speech frames */ for (j = 1; j <= NUM_COEFF; j++) tilt[j] = 0.0; /* De-normalize prob w.r.t variance */ for (i=0;i