;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Language Technologies Institute ;;; ;;; Carnegie Mellon University ;;; ;;; Copyright (c) 1999 ;;; ;;; All Rights Reserved. ;;; ;;; ;;; ;;; Permission is hereby granted, free of charge, to use and distribute ;;; ;;; this software and its documentation without restriction, including ;;; ;;; without limitation the rights to use, copy, modify, merge, publish, ;;; ;;; distribute, sublicense, and/or sell copies of this work, and to ;;; ;;; permit persons to whom this work is furnished to do so, subject to ;;; ;;; the following conditions: ;;; ;;; 1. The code must retain the above copyright notice, this list of ;;; ;;; conditions and the following disclaimer. ;;; ;;; 2. Any modifications must be clearly marked as such. ;;; ;;; 3. Original authors' names are not deleted. ;;; ;;; 4. The authors' names are not used to endorse or promote products ;;; ;;; derived from this software without specific prior written ;;; ;;; permission. ;;; ;;; ;;; ;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ;;; ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;; ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;; ;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ;;; ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;; ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;; ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;; ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;; ;;; THIS SOFTWARE. ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Author: Alan W Black (awb@cs.cmu.edu) ;;; ;;; Date: December 1999 ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Generate a C compilable lexicon file from a Festival lexicon ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (lextoC name ifile odir) "(lextoc name ifile ofile) Convert a (Festival) compiled lexicon file to a format suitable For C to compile." (let ((ofde (fopen (path-append odir (string-append name "_lex_entry.c")) "w")) (ofdp (fopen (path-append odir (string-append name "_lex_phone.c")) "w")) (ifd (fopen ifile "r")) (phone_table)) (format ofde "/*******************************************************/\n") (format ofde "/** Autogenerated lexicon entry file from %s */\n" name) (format ofde "/*******************************************************/\n") (format ofde "\n") (format ofde "#include \"cst_string.h\"\n\n") (format ofde "#include \"cst_lexicon.h\"\n\n") (format ofde "extern const unsigned char %s_lex_phones[];\n" name) (format ofde "const lexicon_entry %s_lex_entry[] = \n" name) (format ofde "{\n") (format ofdp "/*******************************************************/\n") (format ofdp "/** Autogenerated lexicon phones file from %s */\n" name) (format ofdp "/*******************************************************/\n") (format ofdp "\n") (format ofdp "#include \"cst_lexicon.h\"\n") (format ofdp "const unsigned char %s_lex_phones[] = \n" name) (format ofdp "{\n") (format ofdp " ") (set! phone_table (l2C_dump_entries ifd ofde ofdp)) (format ofde" { NULL, 0 } \n") (format ofde"};\n") (format ofde "\n") (format ofdp" 0 \n") (format ofdp"};\n") (format ofdp "\n") ;; Number of entries (format ofde "\n") (format ofde "const int %s_num_entries = %d;\n" name lex_num_entries) (format ofde "\n") ;; The phone table (bytes to phone names) (format ofde "const char * const %s_phone_table[%d] = \n" name (+ 1 (length phone_table))) (format ofde "{\n") (mapcar (lambda (p) (format ofde " \"%s\",\n" p)) phone_table) (format ofde " NULL\n") (format ofde "};\n") ; ;; The register function ; (format ofde "\n") ; (format ofde "void register_lex_%s()\n" name) ; (format ofde "{\n") ; (format ofde " lexicon *lex = new_lexicon();\n") ; (format ofde " lex->name = cst_strdup(\"%s\");\n" name) ; (format ofde " lex->num_entries = %s_num_entries;\n" name) ; (format ofde " lex->entry_index = %s_lex_entry;\n" name) ; (format ofde " lex->phones = %s_lex_phones;\n" name) ; (format ofde " lex->phone_table = %s_phone_table;\n" name) ; (format ofde " lexicon_register(lex);\n") ; (format ofde "}\n") (format ofde "\n") (fclose ofde) (fclose ofdp) )) (define (l2C_dump_entries ifd ofde ofdp) "(l2C_dump_entries ifd ofde ofdp) Dump lexical entries in packed format where, single letter pos is appended to the word form and phones are packed into an unsigned char* where each phone (+ stress) is assigned a byte code. An index into this is added to the lexical entry. The byte code is returned as an ordered list." (let ((phone_table (list "_epsilon_")) (entry) (entry_count 0) (pos) (pcount 0)) (if (not (string-equal "MNCL" (readfp ifd))) (error "L2C: input file is not a compiled lexicon\n")) (while (not (equal? (set! entry (readfp ifd)) (eof-val))) (if (not (car (cdr entry))) (set! pos "0") (set! pos (substring (string-append (car (cdr entry))) 0 1))) (format t "entry: %l\n" entry) (if (or (car (cdr entry)) (< (length (car entry)) 5) (not (equal? (car (cdr (cdr (cdr entry)))) (l2C_phonetize (car (cdr (cdr entry))))))) (begin ;; Lexical entry (set! entry_count (+ 1 entry_count)) (format ofde " { \"%s%s\", %d },\n" pos (car entry) pcount) ;; Phone list (mapcar (lambda (p) (set! pcount (+ 1 pcount)) (format ofdp "%3d, " (l2C_phone_index p 0 phone_table))) (l2C_phonetize (car (cdr (cdr entry))))) (set! pcount (+ 1 pcount)) (format ofdp " 0, ") (format ofdp " /* \"%s\" %d */\n " (car entry) pcount)) (format t " skipped\n"))) (set! lex_num_entries entry_count) ;; shouldn't be a global phone_table)) (define (l2C_phone_index p n table) (cond ((string-equal p (car table)) n) ((not (cdr table)) ;; new p (set-cdr! table (list p)) (+ 1 n)) (t (l2C_phone_index p (+ 1 n) (cdr table))))) ;; Should be a better way to do this (set! vowels '( ;; radio (CMULEX) aa ae ah ao aw ax axr eh el em en er ey ih iy ow oy ;; mrpa (OALD) uh e a o i u ii uu oo aa @@ ai ei oi au ou e@ i@ u@ @ ;; Dialect independent lexicon @ @@ @@r @r a aa ah ai ao ao ar au e ei eir er i i@ ii ir iy o o oi oo oou oou or ou our ow u uh ur uu ;; ogi_worldbet i: I E @ u U ^ & > A 3r ei aI >i iU aU oU )) (define (is_a_vowel p) ; (if (> (car (cdr festival_version_number)) 4) ; (string-equal "+" (Param.get (format nil "phoneset.%s.vc" p))) ; (string-equal "+" ; (car (cdr ; (assoc_string p ; (car (cdr ; (assoc 'phones ; (PhoneSet.description '(phones))))))))) (member_string p vowels) ; ) ) (define (l2C_phonetize syls) "(l2C_phonetize syls) Return simple list of atomic phone/stress values" (apply append (mapcar (lambda (syl) (mapcar (lambda (p) (if (and (is_a_vowel p) (equal? 1 (car (cdr syl)))) (intern (string-append p (car (cdr syl)))) p)) (car syl))) syls))) (define (L2C_make_phone_index_tree name ifile ofile) "(L2C_make_phone_index_tree name ifile ofile) Build a regular grammar for the words in this lexicon in order to build a more efficent representation of the pronunciations." (let ((ifd (fopen ifile "r")) (ofd (fopen ofile "w")) entry) (if (not (string-equal "MNCL" (readfp ifd))) (error "L2C: input file is not a compiled lexicon\n")) (format ofd "(TreeLexicon\n") (format ofd " %s\n" name) (format ofd " nil\n") (format ofd " (\n") (while (not (equal? (set! entry (readfp ifd)) (eof-val))) (format ofd " ( ") (mapcar (lambda (l) (format ofd "%s " l)) (reverse (symbolexplode (car entry)))) (if (car (cdr entry)) (format ofd "pos_%s " (car (cdr entry))) (format ofd "0 ")) (mapcar (lambda (l) (format ofd "%s " l)) (l2C_phonetize (car (cdr (cdr entry))))) (format ofd " -> W 1.0)\n")) (format ofd " ))\n") )) (provide 'make_lex)