/* ====================================================================
 * Copyright (c) 1999-2001 Carnegie Mellon University.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * This work was supported in part by funding from the Defense Advanced 
 * Research Projects Agency and the National Science Foundation of the 
 * United States of America, and the CMU Sphinx Speech Consortium.
 *
 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ====================================================================
 *
 */
/*
 * time_align.h
 */

void time_align_set_beam_width(double bw);

int time_align_init(void);

void time_align_set_input(float *c,
			  float *d,
			  float *d_80,
			  float *p,
			  float *dd,
			  int n_f);

int time_align_word_sequence(char const * utt,
			     char const *left_word,
			     char *word_seq,
			     char const *right_word);

int time_align_seg_output(unsigned short **seg,
			  int *seg_cnt);
char *time_align_best_word_string(void);

#define NONE	-1
#define NO_ID	-1
#define NO_FRAME	-1
#define NO_EVAL	-1
#define NO_BP	-1

#define NO_SEGMENTATION	-1
#define NO_MEMORY	-2

#define FILLER_PHONE_SEQ -1
#define UNDEFINED	-1
#define INTERNAL_PHONE	-2

#define RIGHT_ADJACENT	1
#define LEFT_ADJACENT	-1
#define NOT_ADJACENT	0

#define NODE_CNT	(HMM_LAST_STATE+1)
#define MAX_NON_SIL_RC	10
#define MAX_SIL_RC	10

#define MAX_NODES	1024

#define MAX_COMPOUND_LEN	4

typedef struct compound_word_struct {
    int   word_id;		/* dictionary word id of the compound word */
    char const *word_str;	/* dictionary word string of the compound word */
    char const *match_str;	/* string to match against the input word sequence */
    int   word_cnt;		/* number of words in the match_str component */
} COMPOUND_WORD_T;

typedef struct dynmodel_struct {
    int model_best_score;	/* over all states in the model, the best score */
    int sseq_id;		/* senone sequence */
    int score[NODE_CNT];	/* best acoustic score to each state */
    int wbp[NODE_CNT];		/* word back pointer table index per state*/
    int pbp[NODE_CNT];		/* phone back pointer table index per state */
    int sbp[NODE_CNT];		/* state back pointer table index per state */
    int next_cnt;		/* number of successors to this model */
    int *next;			/* the indices of the successors to this model */
} DYNMODEL_T;

typedef struct {
    int id;			/* an identifier of the word/phone/state exited */
    int end_frame;		/* the time frame when the word/phone/state was exited */
    int score;			/* the score of the path at the point when the word/phone/state
				   was exited */
    int prev;			/* The index into the back pointer table of the prior back pointer */
} BACK_POINTER_T;

typedef struct {
    char const *name;		/* a string representation of the word/phone associated w/ the
				   time segment */
    int id;			/* integer representation of the word/phone/state segment */
    int start;			/* the frame when the word/phone was entered */
    int end;			/* the frame when the word/phone was exited */
    int score;			/* the acoustic score associated with the time segment.  Computed
				   as the difference of the back pointer score of the word/phone
				   end and the prior back pointer score */
} SEGMENT_T;

/* state segmentation will be handled w/ a simpler scheme since there is a one-to-one
   correspondence between states as frames.  So the start/end information need not be
   kept. */
    

typedef enum {WORD_SEGMENTATION, PHONE_SEGMENTATION, STATE_SEGMENTATION} seg_kind_t;

SEGMENT_T *time_align_get_segmentation(seg_kind_t kind, int *seg_cnt);