;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Language Technologies Institute ;;; ;;; Carnegie Mellon University ;;; ;;; Copyright (c) 1999 ;;; ;;; All Rights Reserved. ;;; ;;; ;;; ;;; Permission is hereby granted, free of charge, to use and distribute ;;; ;;; this software and its documentation without restriction, including ;;; ;;; without limitation the rights to use, copy, modify, merge, publish, ;;; ;;; distribute, sublicense, and/or sell copies of this work, and to ;;; ;;; permit persons to whom this work is furnished to do so, subject to ;;; ;;; the following conditions: ;;; ;;; 1. The code must retain the above copyright notice, this list of ;;; ;;; conditions and the following disclaimer. ;;; ;;; 2. Any modifications must be clearly marked as such. ;;; ;;; 3. Original authors' names are not deleted. ;;; ;;; 4. The authors' names are not used to endorse or promote products ;;; ;;; derived from this software without specific prior written ;;; ;;; permission. ;;; ;;; ;;; ;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ;;; ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;; ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;; ;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ;;; ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;; ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;; ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;; ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;; ;;; THIS SOFTWARE. ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Author: Alan W Black (awb@cs.cmu.edu) ;;; ;;; Date: December 1999 ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Generate a C compilable lts rules. ;;; ;;; ;;; ;;; Two modes, from decision graphs as wfsts or from CART trees ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; These are preordained by the LTS building process (set! lts_context_window_size 4) (set! lts_context_extra_feats 1) (define (ltsregextoC name idir odir) "(ltsregextoC name idir odir) Converts its wfsts to a C compilation structure for flite. Assumes $idir/[a-z].tree.wfst to compile from." (let ((ofde (fopen (path-append odir (string-append name "_lts_rules.c")) "w")) (ofdh (fopen (path-append odir (string-append name "_lts_rules.h")) "w")) (ifd) (rule_index nil)) (set! lts_pos 0) (set! phone_table (list "epsilon")) (format ofde "/*******************************************************/\n") (format ofde "/** Autogenerated lts rules (regex) for %s */\n" name) (format ofde "/** from %s */\n" idir) (format ofde "/*******************************************************/\n") (format ofde "\n") (format ofde "#include \"cst_string.h\"\n") (format ofde "#include \"cst_lts.h\"\n") (format ofde "#include \"cst_lexicon.h\"\n") (format ofde "#include \"%s_lts_rules.h\"\n\n" name) (format ofde "extern const cst_lts_rules %s_lts_rules;\n\n" name) (format ofde "static const cst_lts_model %s_lts_model[] = \n" name) (format ofde "{\n") (mapcar (lambda (l) (let ((ifd (fopen (path-append idir (string-append l ".tree.wfst")) "r"))) (format t "doing: %s\n" l) (format ofde " /** letter %s **/\n" l) (format ofdh " /** letter %s **/\n" l) (set! rule_index (cons (list l lts_pos) rule_index)) (set! lts_pos (dump_lts_wfst l ifd ofde ofdh lts_pos)) (fclose ifd))) '("a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z") ) (format ofde " 0, 0, 0,0, 0,0\n") (format ofde "};\n") ;; The phone table (bytes to phone names) (format ofde "\n") (format ofde "static const char * const %s_lts_phone_table[%d] = \n" name (+ 1 (length phone_table))) (format ofde "{\n") (mapcar (lambda (p) (format ofde " \"%s\",\n" p)) phone_table) (format ofde " NULL\n") (format ofde "};\n") ;; Which rule starts where (format ofde "\n") (format ofde "static const cst_lts_addr %s_lts_letter_index[27] = \n" name) (format ofde "{\n") (mapcar (lambda (p) (format ofde " %d, /* %s */\n" (car (cdr p)) (car p))) (reverse rule_index)) (format ofde " 0\n") (format ofde "};\n") (format ofde "\n") (format ofde "const cst_lts_rules %s_lts_rules = {\n" name) (format ofde " \"%s\",\n" name) (format ofde " %s_lts_letter_index,\n" name) (format ofde " %s_lts_model,\n" name) (format ofde " %s_lts_phone_table,\n" name) (format ofde " 4, /* context_window_size */\n") (format ofde " 1 /* context_extra_feats */\n") (format ofde "};\n") (format ofde "\n") (fclose ofde) (fclose ofdh) )) (define (dump_lts_wfst l ifd ofde ofdh lts_pos) "(dump_lts_wfst ifd ofde ofdh lts_pos) Dump the WFST as a byte table to ifd. Jumps are dumped as #define's to ofdh so forward references work. lts_pos is the rule position. Each state is saves as feature value true_addr false_addr Feature and value are single bytes, which addrs are double bytes." (let ((state)) ;; Skip WFST header (while (not (string-equal (set! state (readfp ifd)) "EST_Header_End")) (if (equal? state (eof-val)) (error "eof in lts regex file"))) (while (not (equal? (set! state (readfp ifd)) (eof-val))) (format ofdh "#define LTS_STATE_%s_%d %s\n" l (car (car state)) (lts_bytify lts_pos)) (cond ((string-equal "final" (car (cdr (car state)))) (set! lts_pos (- lts_pos 1)) t) ;; do nothing ((string-matches (car (car (cdr state))) ".*_.*") (format ofde " %s, '%s', %s , %s , \n" (lts_feat (car (car (cdr state)))) (lts_val (car (car (cdr state)))) (format nil "LTS_STATE_%s_%d" l (car (cdr (cdr (car (cdr (cdr state))))))) (format nil "LTS_STATE_%s_%d" l (car (cdr (cdr (car (cdr state)))))))) (t ;; its a letter output state (format ofde " 255, %s, 0,0 , 0,0 , \n" (lts_phone (car (car (cdr state))) 0 phone_table)))) (set! lts_pos (+ 1 lts_pos))) lts_pos)) (define (lts_feat trans) "(lts_feat trans) Returns the feature number represented in this transition name." (let ((fname (substring trans 5 (- (length trans) 11)))) (cond ((string-equal fname "p.p.p.p.name") 0) ((string-equal fname "p.p.p.name") 1) ((string-equal fname "p.p.name") 2) ((string-equal fname "p.name") 3) ((string-equal fname "n.name") 4) ((string-equal fname "n.n.name") 5) ((string-equal fname "n.n.n.name") 6) ((string-equal fname "n.n.n.n.name") 7) (t (error (format nil "ltsregex2C: unknown feat %s\n" trans )))))) (define (lts_val trans) "(lts_val trans) The letter being tested." (substring trans (- (length trans) 2) 1)) (define (lts_phone p n table) (cond ((string-equal p (car table)) n) ((not (cdr table)) ;; new p (set-cdr! table (list p)) (+ 1 n)) (t (lts_phone p (+ 1 n) (cdr table))))) (define (lts_bytify n) "(lts_bytify n) Return this short as a two byte comma separated string." (let ((xx (format nil "%04x" n))) ;; This is unfortunately byte order specific (format nil "0x%s,0x%s" (substring xx 2 2) (substring xx 0 2)))) (provide 'make_lts)