/* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * */ /* KB.C - for compile_kb * * 27-May-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University * Included Bob Brennan's personaldic handling (similar to * oovdic). * * 09-Dec-94 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Cleaned up kb() interface; got rid of fwd-only, astar-mode * etc. * * Revision 8.6 94/10/11 12:33:49 rkm * Minor changes. * * Revision 8.5 94/07/29 11:52:10 rkm * Removed lmSetParameters() call; that is now part of lm_3g.c. * Added lm_init_oov() call to LM module. * Added ilm_init() call to ILM module. * * Revision 8.4 94/05/19 14:19:12 rkm * Commented out computePhraseLMProbs(). * * Revision 8.3 94/04/14 14:38:01 rkm * Added OOV words sub-dictionary. * * Revision 8.1 94/02/15 15:08:13 rkm * Derived from v7. Includes multiple start symbols for the LISTEN * project. Includes multiple LMs for grammar switching. * * Revision 6.15 94/02/11 13:15:18 rkm * Initial revision (going into v7). * Added multiple start symbols for the LISTEN project. * * Revision 6.14 94/01/31 16:35:17 rkm * Moved check for use of 8/16-bit senones on HPs to after pconf(). * * Revision 6.13 94/01/07 17:48:13 rkm * Added option to use trigrams in forward pass (simple implementation). * * Revision 6.12 94/01/05 16:04:20 rkm * *** empty log message *** * * Revision 6.11 94/01/05 16:02:17 rkm * Placed senone probs compression under conditional compilation. * * Revision 6.10 93/12/05 17:25:46 rkm * Added -8bsen option and necessary datastructures. * * Revision 6.9 93/12/04 16:24:11 rkm * Added check for use of -16bsen if compiled with _HPUX_SOURCE. * * Revision 6.8 93/11/15 12:20:39 rkm * Added -ilmusesdarpalm flag. * * Revision 6.7 93/11/03 12:42:35 rkm * Added -16bsen option to compress senone probs to 16 bits. * * Revision 6.6 93/10/27 17:29:45 rkm * *** empty log message *** * * Revision 6.5 93/10/15 14:59:13 rkm * Bug-fix in call to create_ilmwid_map. * * Revision 6.4 93/10/13 16:48:16 rkm * Added ilm_init call to Roni's ILM and whatever else is needed. * Added option to process A* only (in which case phoneme-level * files are not loaded). Added bigram_only argument to lm_read * call. * */ #include #include #include #include #include "s2types.h" #include "CM_macros.h" #include "basic_types.h" #include "search_const.h" #include "list.h" #include "hash.h" #include "err.h" #include "c.h" #include "pconf.h" #include "log.h" #include "dict.h" #include "msd.h" #include "smmap4f.h" #include "lm.h" #include "lmclass.h" #include "lm_3g.h" #include "hmm_tied_r.h" #include "scvq.h" #include "kb.h" #include "phone.h" #include "fbs.h" #define QUIT(x) {fprintf x; exit(-1);} static char *lm_file_name = 0; static char *lm_ctl_filename = 0; /* Multiple LM filenames and assoc. LM names */ static char *lmft_file_name = 0; static char *lm_tag_file_name = 0; static char *lm_word_tags_file_name = 0; static char const *lm_start_sym = ""; static char const *lm_end_sym = ""; static char *phone_file_name = 0; static char *mapFileName = 0; static char *dict_file_name = 0; static char *oov_dict_file_name = 0; static char *personal_dict_file_name = 0; static char *phrase_dict_file_name = 0; static char *noise_dict_file_name = 0; static char *hmm_dir = 0; static char *hmm_dir_list = 0; static char const *hmm_ext = "chmm"; static double hmm_smooth_min = 0.0000001; static char const *code1_ext = "ccode"; static char const *code2_ext = "d2code"; static char const *code3_ext = "p3code"; static char const *code4_ext = "xcode"; static int32 num_cb = 4; /* Number of code books */ static char *sendumpfile = 0; /* Senone probs dump file */ static int32 Use8BitSenProb = FALSE; /* TRUE=>use sen-probs compressed to 8bits */ static int32 UseDarpaStandardLM = TRUE; /* FALSE => Use Roni Rosenfeld LM */ static int32 UseBigramOnly = FALSE; /* Only use bigram even is trigram is avaiable */ static int32 UseWordPair = FALSE; /* Use word pair probs only */ static int32 use_left_context = TRUE; static float unigramWeight = 1.0; /* Unigram wieght */ static double transSmooth = 0.0001; /* Transition smoothing floor */ static double transWeight = 1.0; /* Transition Weight */ static int32 NoLangModel = TRUE; static int32 useBigHmmFiles = FALSE; /* use big hmms files for IO */ static int32 useCiTrans = TRUE; /* only ci transitions in hmms */ static int32 useCiPhonesOnly = FALSE; /* only ci phones */ static int32 useWDPhonesOnly = FALSE; /* only with in word phones */ static float silence_word_penalty; static float insertion_penalty; static float filler_word_penalty; static float language_weight; static int32 max_new_oov = 0; /* #new OOVs that can be added at run time */ static float oov_ugprob = -4.5; /* (Actually log10(ugprob)) of OOVs */ extern void unlimit(void); /* LM dump directory */ static char *kb_dump_dir = NULL; static char *startsym_file = NULL; /* For LISTEN project */ config_t kb_param[] = { /* * LongName, Documentation, Switch, TYPE, Address */ { "LmFile", "Language model file name", "-lmfn", STRING, (caddr_t) &lm_file_name }, { "LmControlFile", "Language model control file name", "-lmctlfn", STRING, (caddr_t) &lm_ctl_filename }, { "LmFTFile", "Language model (forward trigram) file name", "-lmftfn", STRING, (caddr_t) &lmft_file_name }, { "LmTagFile", "Language model tag file name", "-lmtagfn", STRING, (caddr_t) &lm_tag_file_name }, { "LmWordTagFile", "Language model word tag file name", "-lmwtagfn", STRING, (caddr_t) &lm_word_tags_file_name }, { "LmStartSym", "Langauge model start symbol", "-lmstartsym", STRING, (caddr_t) &lm_start_sym }, { "LmEndSym", "Langauge model end symbol", "-lmendsym", STRING, (caddr_t) &lm_end_sym }, { "UseDarpaStandardLM", "Use DARPA standard LM", "-useDarpaLM", BOOL, (caddr_t) &UseDarpaStandardLM }, { "UseBigramOnly", "Only use a bigram model", "-useBigramOnly", BOOL, (caddr_t) &UseBigramOnly }, { "UseWordPair", "Use word pair probabilities", "-usewordpair", BOOL, (caddr_t) &UseWordPair }, { "MapFile", "Distribution map", "-mapfn", STRING, (caddr_t) &mapFileName }, { "DictFile", "Dictionary file name", "-dictfn", STRING, (caddr_t) &dict_file_name }, { "OOVDictFile", "OOV Dictionary file name", "-oovdictfn", STRING, (caddr_t) &oov_dict_file_name }, { "PersonalDictFile", "Personal Dictionary file name", "-perdictfn", STRING, (caddr_t) &personal_dict_file_name }, { "PhraseDictFile", "Phrase dictionary file name", "-pdictfn", STRING, (caddr_t) &phrase_dict_file_name }, { "NoiseDictFile", "Noise Dictionary file name", "-ndictfn", STRING, (caddr_t) &noise_dict_file_name }, { "StartSymbolsFile", "Start symbols file name", "-startsymfn", STRING, (caddr_t) &startsym_file }, { "UseLeftContext", "Use the left context models", "-useleftcontext", BOOL, (caddr_t) &use_left_context }, { "PhoneFile", "Phone file name", "-phnfn", STRING, (caddr_t) &phone_file_name }, { "UseBigHmmFiles", "Use big hmm file format", "-usebighmm", BOOL, (caddr_t) &useBigHmmFiles }, { "UseCITransOnly", "Only use trans probs from CI phones", "-usecitrans", BOOL, (caddr_t) &useCiTrans }, { "UseCIPhonesOnly", "Only use CI phones", "-useciphones", BOOL, (caddr_t) &useCiPhonesOnly }, { "WordPhonesOnly", "Only use with in word phones", "-usewdphones", BOOL, (caddr_t) &useWDPhonesOnly }, { "HmmDirList", "Hmm Directory List", "-hmmdirlist", STRING, (caddr_t) &hmm_dir_list }, { "HmmDir", "Hmm Directory", "-hmmdir", STRING, (caddr_t) &hmm_dir }, { "HmmExt", "Hmm Extention", "-hmmext", STRING, (caddr_t) &hmm_ext }, { "Code1Ext", "Code1 Extention", "-code1ext", STRING, (caddr_t) &code1_ext }, { "Code2Ext", "Code2 Extention", "-code2ext", STRING, (caddr_t) &code2_ext }, { "Code3Ext", "Code3 Extention", "-code3ext", STRING, (caddr_t) &code3_ext }, { "Code4Ext", "Code4 Extention", "-code4ext", STRING, (caddr_t) &code4_ext }, { "NumCb", "Number of Codebooks", "-numcb", INT, (caddr_t) &num_cb }, { "HmmSmoothMin", "Hmm minimum output probability", "-hmmsm", DOUBLE, (caddr_t) &hmm_smooth_min }, { "TransWeight", "Arc transition weight", "-transwt", DOUBLE, (caddr_t) &transWeight }, { "TransSmooth", "Minimum arc transition probability", "-transsm,", DOUBLE, (caddr_t) &transSmooth }, { "Unigram Weight", "Unigram weight, 0.0 - 1.0", "-ugwt", FLOAT, (caddr_t) &unigramWeight }, { "Use8BitSenProb", "#bits to use for senone probs = 8", "-8bsen", BOOL, (caddr_t) &Use8BitSenProb }, { "InsertionPenalty", "Penalty for word transitions", "-inspen", FLOAT, (caddr_t) &insertion_penalty }, { "SilenceWordPenalty", "Penalty for silence word transitions", "-silpen", FLOAT, (caddr_t) &silence_word_penalty }, { "FillerWordPenalty", "Penalty for filler word transitions", "-fillpen", FLOAT, (caddr_t) &filler_word_penalty }, { "LanguageWeight", "Weighting on Language Probabilities", "-langwt", FLOAT, (caddr_t) &language_weight }, { "MaxNewOOV", "MAX New OOVs that can be added at run time", "-maxnewoov", INT, (caddr_t) &max_new_oov }, { "OOVUgProb", "OOV Unigram Log Prob", "-oovugprob", FLOAT, (caddr_t) &oov_ugprob }, { "KBDumpDirectory", "KB dump directory", "-kbdumpdir", STRING, (caddr_t) &kb_dump_dir }, { "SenoneProbDumpFile", "Senone Probs Dump File", "-sendumpfn", STRING, (caddr_t) &sendumpfile }, { 0,0,0,NOTYPE,0 } }; int32 num_alphabet = NUMOFCODEENTRIES; SMD *smds; /* Pointer to the smd's */ int32 numSmds; /* Number of SMDs allocated */ dictT *word_dict; static float phone_insertion_penalty; void kbAddGrammar(char const *fileName, char const *grammarName) { lmSetStartSym (lm_start_sym); lmSetEndSym (lm_end_sym); lm_read (fileName, grammarName, language_weight, unigramWeight, insertion_penalty); } static void kb_init_lmclass_dictwid (lmclass_t cl) { lmclass_word_t w; int32 wid; for (w = lmclass_firstword(cl); lmclass_isword(w); w = lmclass_nextword(cl, w)) { wid = kb_get_word_id (lmclass_getword (w)); lmclass_set_dictwid (w, wid); } } void kb (int argc, char *argv[], float ip, /* word insertion penalty */ float lw, /* langauge weight */ float pip) /* phone insertion penalty */ { char *pname = argv[0]; char hmm_file_name[256]; int32 num_phones, num_ci_phones; int32 i, use_darpa_lm; /* FIXME: This is evil. But if we do it, let's prototype it somewhere, OK? */ unlimit (); /* Remove memory size limits */ language_weight = lw; insertion_penalty = ip; phone_insertion_penalty = pip; pconf (argc, argv, kb_param, 0, 0, 0); if ((phone_file_name == 0) || (dict_file_name == 0)) pusage (pname, (Config_t *)kb_param); E_INFO("%s(%d): Reading phone file [%s]\n", __FILE__, __LINE__, phone_file_name); if (phone_read (phone_file_name)) exit (-1); if (useWDPhonesOnly) phone_add_diphones(); num_ci_phones = phoneCiCount(); /* Read the distribution map file */ E_INFO("%s(%d): Reading map file [%s]\n", __FILE__, __LINE__, mapFileName); read_map (mapFileName, TRUE /* useCiTrans compress */); E_INFO("%s(%d): Reading dict file [%s]\n", __FILE__, __LINE__, dict_file_name); word_dict = dict_new (); if (dict_read (word_dict, dict_file_name, phrase_dict_file_name, noise_dict_file_name, !useWDPhonesOnly)) exit (-1); use_darpa_lm = TRUE; if (use_darpa_lm) { lmSetStartSym (lm_start_sym); lmSetEndSym (lm_end_sym); /* * Read control file describing multiple LMs, if specified. * File format (optional stuff is indicated by enclosing in []): * * [{ LMClassFileName LMClassFilename ... }] * TrigramLMFileName LMName [{ LMClassName LMClassName ... }] * TrigramLMFileName LMName [{ LMClassName LMClassName ... }] * ... * (There should be whitespace around the { and } delimiters.) * * This is an extension of the older format that had only TrigramLMFilenName * and LMName pairs. The new format allows a set of LMClass files to be read * in and referred to by the trigram LMs. (Incidentally, if one wants to use * LM classes in a trigram LM, one MUST use the -lmctlfn flag. It is not * possible to read in a class-based trigram LM using the -lmfn flag.) * * No "comments" allowed in this file. */ if (lm_ctl_filename) { FILE *ctlfp; char lmfile[4096], lmname[4096], str[4096]; lmclass_set_t lmclass_set; lmclass_t *lmclass, cl; int32 n_lmclass, n_lmclass_used; lmclass_set = lmclass_newset(); E_INFO("Reading LM control file '%s'\n", lm_ctl_filename); ctlfp = CM_fopen (lm_ctl_filename, "r"); if (fscanf (ctlfp, "%s", str) == 1) { if (strcmp (str, "{") == 0) { /* Load LMclass files */ while ((fscanf (ctlfp, "%s", str) == 1) && (strcmp (str, "}") != 0)) lmclass_set = lmclass_loadfile (lmclass_set, str); if (strcmp (str, "}") != 0) E_FATAL("Unexpected EOF(%s)\n", lm_ctl_filename); if (fscanf (ctlfp, "%s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; /* Fill in dictionary word id information for each LMclass word */ for (cl = lmclass_firstclass(lmclass_set); lmclass_isclass(cl); cl = lmclass_nextclass(lmclass_set, cl)) { kb_init_lmclass_dictwid (cl); } /* At this point if str[0] != '\0', we have an LM filename */ n_lmclass = lmclass_get_nclass(lmclass_set); lmclass = (lmclass_t *) CM_calloc (n_lmclass, sizeof(lmclass_t)); /* Read in one LM at a time */ while (str[0] != '\0') { strcpy (lmfile, str); if (fscanf (ctlfp, "%s", lmname) != 1) E_FATAL("LMname missing after LMFileName '%s'\n", lmfile); n_lmclass_used = 0; if (fscanf (ctlfp, "%s", str) == 1) { if (strcmp (str, "{") == 0) { /* LM uses classes; read their names */ while ((fscanf (ctlfp, "%s", str) == 1) && (strcmp (str, "}") != 0)) { if (n_lmclass_used >= n_lmclass) E_FATAL("Too many LM classes specified for '%s'\n", lmfile); lmclass[n_lmclass_used] = lmclass_get_lmclass (lmclass_set, str); if (! (lmclass_isclass(lmclass[n_lmclass_used]))) E_FATAL("LM class '%s' not found\n", str); n_lmclass_used++; } if (strcmp (str, "}") != 0) E_FATAL("Unexpected EOF(%s)\n", lm_ctl_filename); if (fscanf (ctlfp, "%s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; if (n_lmclass_used > 0) lm_read_clm (lmfile, lmname, language_weight, unigramWeight, insertion_penalty, lmclass, n_lmclass_used); else lm_read (lmfile, lmname, language_weight, unigramWeight, insertion_penalty); } fclose (ctlfp); NoLangModel = FALSE; } /* Read "base" LM file, if specified */ if (lm_file_name) { lmSetStartSym (lm_start_sym); lmSetEndSym (lm_end_sym); lm_read (lm_file_name, "", language_weight, unigramWeight, insertion_penalty); /* Make initial OOV list known to this base LM */ lm_init_oov (); NoLangModel = FALSE; } #ifdef USE_ILM /* Init ILM module (non-std-Darpa LM, eg ug/bg cache LM) */ ilm_init (); #endif } #if 0 /* Compute the phrase lm probabilities */ computePhraseLMProbs (); #endif num_phones = phone_count (); numSmds = hmm_num_sseq(); smds = (SMD *) CM_calloc (numSmds, sizeof (SMD)); /* * Read the hmm's into the SMD structures */ if (useBigHmmFiles) { for (i = 0; i < num_ci_phones; i++) { sprintf (hmm_file_name, "%s.%s", phone_from_id (i), hmm_ext); hmm_tied_read_big_bin (hmm_dir_list, hmm_file_name, smds, transSmooth, NUMOFCODEENTRIES, TRUE, transWeight); } } else { for (i = 0; i < num_phones; i++) { if ((!useCiTrans) || (phone_id_to_base_id(i) == i)) { sprintf (hmm_file_name, "%s.%s", phone_from_id (i), hmm_ext); hmm_tied_read_bin (hmm_dir_list, hmm_file_name, &smds[hmm_pid2sid(i)], transSmooth, NUMOFCODEENTRIES, TRUE, transWeight); } } } /* * Use Ci transitions ? */ if (useCiTrans) { for (i = 0; i < num_phones; i++) { if (hmm_pid2sid(phone_id_to_base_id(i)) != hmm_pid2sid(i)) { /* * Just make a copy of the CI phone transitions */ memcpy (&smds[hmm_pid2sid(i)], &smds[hmm_pid2sid(phone_id_to_base_id(i))], sizeof (SMD)); } } } /* * Read the distributions */ read_dists (hmm_dir, code1_ext, code2_ext, code3_ext, code4_ext, NUMOFCODEENTRIES, hmm_smooth_min, useCiPhonesOnly); if (Use8BitSenProb) SCVQSetSenoneCompression (8); /* * Map the distributions to the correct locations */ remap (smds); } #if 0 computePhraseLMProbs () { int32 wcnt = dict_count (word_dict); int32 i; char stmp[256]; char *p, *q, *r; for (i = 0; i < wcnt; i++) { int32 prob = 0; /* * Is this a phrase word ? */ if (word_dict->dict_list[i]->wid != word_dict->dict_list[i]->fwid) { strcpy (stmp, word_dict->dict_list[i]->word); q = stmp; p = index (q, '_'); if (p) { *p = '\0'; p++; } while (q && p) { r = index (p, '_'); if (r) { *r = '\0'; r++; } /* * Look out for alternate pronuciations in phrases and strip the * modifier */ { char *lp = rindex(p, '('); char *rp = rindex(p, ')'); if (lp && rp) { *lp = '\0'; } } prob += lmLogProbability (langModel, q, p); q = p; p = r; } E_INFO ("Phrase log prob [%20s] = %8d\n", word_dict->dict_list[i]->word, prob); word_dict->dict_list[i]->lm_pprob = prob; } } } #endif extern int32 totalDists; int32 kb_get_total_dists (void) { return totalDists; } int32 kb_get_aw_tprob (void) /*------------------------------------------------------------* * Return the All_Word Transition Probability */ { return (language_weight * (LOG(1.0 / word_dict->dict_entry_count) - LOG(insertion_penalty))); } int32 kb_get_num_models (void) /*------------------------------------------------------------* * Return the number of unique hmm models. */ { return hmm_num_sseq(); } int32 kb_get_num_dist (void) /*------------------------------------------------------------* * Return the number of distributions. */ { return 5; } int32 kb_get_num_model_instances (void) /*------------------------------------------------------------* * Return the number of hmm model instances. */ { register int32 i, tot = 0; for (i = 0; i < word_dict->dict_entry_count; i++) { tot += word_dict->dict_list[i]->len + 10; } return tot; } int32 kb_get_num_words (void) /*------------------------------------------------------------* * Return the number of words. */ { return word_dict->dict_entry_count; } SMD *kb_get_models (void) /*------------------------------------------------------------* * Return the a pointer to the hmm models */ { return smds; } char **kb_get_phone_list (void) /*------------------------------------------------------------* * Return the phone list. */ { return ((char **) phoneList()->list); } extern int32 *Out_Prob1; extern int32 *Out_Prob2; extern int32 *Out_Prob3; extern int32 *Out_Prob4; typedef struct { int32 **prob; /* 2-D array, #codewords x 256 */ unsigned char **id; /* 2-D array, #codewords x #senones */ } OPDF_8BIT_T; extern OPDF_8BIT_T out_prob_8b[]; int32 *kb_get_codebook_0_dist (void) /*------------------------------------------------------------* * Return the permutted codebook 0 distributions. */ { /* UGLY coercion of various pointer types to (int32 *) */ if (Use8BitSenProb) return ((int32 *) &(out_prob_8b[0])); return (Out_Prob1); } int32 *kb_get_codebook_1_dist (void) /*------------------------------------------------------------* * Return the permutted codebook 1 distributions. */ { /* UGLY coercion of various pointer types to (int32 *) */ if (Use8BitSenProb) return ((int32 *) &(out_prob_8b[1])); return (Out_Prob2); } int32 *kb_get_codebook_2_dist (void) /*------------------------------------------------------------* * Return the permutted codebook 2 distributions. */ { /* UGLY coercion of various pointer types to (int32 *) */ if (Use8BitSenProb) return ((int32 *) &(out_prob_8b[2])); return (Out_Prob3); } int32 *kb_get_codebook_3_dist (void) /*------------------------------------------------------------* * Return the permutted codebook 3 distributions. */ { /* UGLY coercion of various pointer types to (int32 *) */ if (Use8BitSenProb) return ((int32 *) &(out_prob_8b[3])); return (Out_Prob4); } int32 kb_get_dist_prob_bytes (void) /*------------------------------------------------------------* * Return kbh.dist_prob_bytes */ { return 4; } int32 kb_get_word_id (char const *word) { #if 0 static char const *rname = "kb_get_word_id"; register int32 i, id = -1; for (i = 0; i < word_dict->dict_entry_count; i++) { if (mystrcasecmp (word, word_dict->dict_list[i]->word) == 0) { id = i; break; } } if (id == -1) { E_WARN(stdout, "%s: Couldn't find word \"%s\"\n", rname, word); } return id; #else return (dict_to_id (word_dict, word)); #endif } char *kb_get_word_str (int32 wid) { return (word_dict->dict_list[wid]->word); } dictT *kb_get_word_dict (void) { return word_dict; } int32 kb_get_num_codebooks (void) { return 4; } int kb_get_darpa_lm_flag (void) { return UseDarpaStandardLM; } int kb_get_no_lm_flag (void) { return NoLangModel; } char const *kb_get_lm_start_sym(void) { return lm_start_sym; } char const *kb_get_lm_end_sym(void) { return lm_end_sym; } char *kb_get_dump_dir(void) { return kb_dump_dir; } char *kb_get_senprob_dump_file(void) { return sendumpfile; } int32 kb_get_senprob_size(void) { if (Use8BitSenProb) return 8; return 32; } /* For LISTEN project */ char *kb_get_startsym_file (void) { return startsym_file; } char *kb_get_oovdic(void) { return oov_dict_file_name; } char *kb_get_personaldic(void) { return personal_dict_file_name; } double kb_get_oov_ugprob (void) { return (oov_ugprob); } int32 kb_get_max_new_oov (void) { return (max_new_oov); } int32 dict_maxsize (void) { return (word_dict->dict.size_hint); }