/*
  File name: pau.h
  Created by: Ljubomir Buturovic
  Created: 02/18/2002
  Purpose: declarations and macros for assorted utilities in PCP.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#ifndef PAU_H
#define PAU_H

#include "pcp.h"
#include "svm.h"
#include "hash.h"

#define ESCAPE_CHAR       27
#define DIGITS            "+-.eE0123456789"

/*
  Max. length of numeric input in PCP.
*/
#define PCP_MAX_NUM       30

/*
  Initial number of models in .svm file.
*/
#ifndef INIT_NO_MODELS
#define INIT_NO_MODELS               20
#endif

struct pcp_stat
{
  int  internal_code;
  int  exit_code;
  char *msg;
};

void pcp_stat_set(struct pcp_stat *pcp_status, int internal_code, int exit_code, char *msg);

struct pcp_stat *hashPutStat(hash_t *hashtable, int key, struct pcp_stat *value);

struct pcp_stat *hashGetStat(hash_t *hashtable, int key);

/*
  Create hash table of PCP status and error codes. Each element in the
  table is a struct pcp_status, which has internal code, exit code,
  and msg corresponding to the status. The exit code is a translation
  of internal code into legal exit status values, which have to be in
  the range [1..255]. This is because UNIX programs return 'status &
  0377' - i.e., the lower byte of the status code passed to exit() -
  to the parent process; see exit(3) for more details.

  Note that this means we only have 127 codes available for PCP status
  (since [0..128] seem to be taken, at least in Linux). If we need
  more, we are doomed. Also, this function needs to be updated every
  time a new error code is introduced.
*/
hash_t *create_status_table(void);

/*
  Log matrix.
*/
void log_mx(float **mx, int rows, int columns, FILE *fdbg);

/*
  Log columns of 'mx'.
*/
void log_mxt(float **mx, int rows, int columns, FILE *fdbg);

/*
  Calculate and display covariance matrix and matrix of linear
  correlation coefficients for data in 'xmx'.
*/
void covar(float **xmx, int d, int nv, int *errc, int dbg);

/*
  Read input data: number of features, number of classes, number of
  vectors per class, and input files.

  Return the populated 'dataset' structure. In case of error, return
  (struct dataset *) 0 and set 'errc' to error code. Error codes are
  PERR_BAD_INPUT_FILE, LERR_FILE_FORMAT, or <errno.h> error codes. In
  case of file-related errors, 'fname' has name of file which caused
  the error.
*/
struct dataset *pcp_input(FILE *indev, FILE *outdev, int *errc, char **xname);

/*
  Print question 'msg' on 'outdev', read integer response from
  'indev'.  The function loops until the answer is in ['min_range',
  'max_range']. 'length' is total length of question line.

  If 'dflt' is not NULL, the function returns that value when the user
  enters empty line.

  In case of error return EOF. The errors are bad arguments ('msg',
  'indev' or 'outdev' NULL) or malloc() error. In the latter case set
  'errno'.
*/
int input_integer(FILE *indev, FILE *outdev, char *msg, int length, 
		  int *dflt, int *min_range, int *max_range);

/*
  Print question 'msg' on 'outdev', read integer response from
  'indev'. The function repeats the question until the response is one
  of the values in vector 'choices' of length 'len'. 'length' is total
  length of question line.

  If 'dflt' is not NULL, the function returns that value when the user
  enters empty line.

  In case of error return EOF. The errors are bad arguments ('msg',
  'indev' or 'outdev' NULL) or malloc() error. In the latter case set
  'errno'.
*/
int input_choice(FILE *indev, FILE *outdev, char *msg, int *dflt, int *choices, int len);

/*
  Float version of input_integer(). The input value must be in
  [min_range, max_range].
*/
float input_float(FILE *indev, FILE *outdev, char *msg, int length, 
		  float *dflt, float *min_range, float *max_range);


/*
  Get filename from stdin. 'dflt' is the default filename. The query
  message is 'txt[dflt]:' or 'txt', however at most PCP_QLEN of the
  message is printed.
*/
char *input_filename(char *txt, char *dflt, FILE *outdev);

/*
  Print 'msg' to 'outdev', centered within a string of 'length'
  characters. If 'msg' is longer than 'length', it is truncated.
*/
void center_line(FILE *outdev, char *msg, int length);

/*
  Print fixed-length message, with variable number of
  arguments. 'nargs' is the number of arguments following 'nargs'. The
  arguments are the same as for printf() and friends.
*/
void vprint_line(int nargs, ...);

/*
  Like vprint_line(), with inverse video. First argument after `nargs'
  is an integer. If it equals one, display message in inverse video,
  otherwise display in normal video.
*/
void viprint_line(int nargs, ...);

/*
  Print 'msg' to 'outdev'. Fill remainder up to 'length' with space
  characters.
*/
void print_line(FILE *outdev, char *msg, int length);

/*
  Similar to print_line(), but doesn't set video, and doesn't print
  newline.
*/
void print_ln(FILE *outdev, char *msg, int length);

void clear_screen(void);

/*
  Position cursor at (x, y). x is column, y is row. (0, 0) is upper
  left corner.
*/
void cpos(int x, int y);

void cursor_on(void);

void cursor_off(void);

void inverse_video(void);

void reset_video(void);

unsigned int input_seed(FILE *indev, FILE *outdev);

int input_nmodels(FILE *indev, FILE *outdev);

/*
  Enter dimension of transformed space for linear dimension
  reduction. The arguments have obvious meanings.
*/
int input_d(FILE *indev, FILE *outdev, int max_value, int default_value);

/*
  Enter number of features to select for feature subset selection.
*/
int input_nfeat(FILE *indev, FILE *outdev, int max_value, int default_value);

int input_nxval(FILE *indev, FILE *outdev, int max_nxval);

/*
  Read MLP parameters from stdin.
*/
int input_mlp(FILE *outdev, int inputs, int outputs, int nvec, int *mlp_continue, 
	      int *nlayers, int **npl, int *itmax, float *range, int *opt_method, 
	      float *eta, float *mu, int *nxval, int max_nxval, int *nmlp,
	      unsigned int *seed, char **fname);


/*
  Get ensemble variant of the original method (bagging or boosting).
*/
int input_ensemble_method(int method);

/*
  Input k-NN distance code.
*/
int input_knn_dist(FILE *outdev);

/*
  Get transformation (SVD, scaling) mode.

  0: use training and test sets to compute the transformation
  1: use only the training set
*/
int input_transform_mode();

/*
  Read k-NN parameters from stdin.
*/
int input_knn(FILE *outdev, int *nxval, int max_nxval, int *nmlp, int *k, int maxk,
	      unsigned int *seed, char **fname, int *dist);

int input_svm_type(char *msg, FILE *outdev);

/*
  Accept kernel type from stdin.
*/
int input_kernel_type(char *msg, FILE *outdev);

/*
  Accept kernel type from a limited selection (only the types
  supported by the model selection functions pcp_svm_grid() and
  pcp_svm_simplex()).
*/
int input_kernel(char *msg, FILE *outdev);

void input_class_costs(struct svm_parameter *parameters, int c);

/*
  Read SVM parameters from stdin. Check consistency with 'problem'.

  If `full_par' is not 0, read C/nu, gamma, degree and coef0
  parameters, depending on the SVM type. Otherwise, if `ktype' is
  PCP_SVM_K_NONE, read only the kernel type. Otherwise, we don't need
  these parameter values (presumably because we are called from a
  function which will optimize them).
  
  If `method' is not NULL, read the ensemble variant of PALG_SVM
  (i.e., PALG_BAG_SVM or PALG_ADABOOST_SVM).
*/
int input_svm(FILE *outdev, int c, int nvec, struct svm_problem *problem,
	      struct svm_parameter *parameters, char **fname, int *nxval, 
	      int max_nxval, int full_par, int ktype, int *ensemble_method);

/*
  Kernel types are internally coded starting from 1 (because 0 is
  reserved for the default kernel type). This function converts the
  internal kernel types to LIBSVM kernel types.
*/
int convert_kernel_type(int type);

int input_nexp(char *msg);

/*
  Read feature selection criterion. Used to evaluate feature subsets.

  Note that PCP_FSEL_GOLUB is only defined for two classes.
*/
int input_fscrit(FILE *outdev, int dr_method, int c);

/*
  Read dimension reduction parameters: method, dimension of
  transformed space, and feature subset evaluation criterion, for
  feature selection methods.

  The function returns the chosen method, and sets the dimension of
  transformed space in `idr' and the feature subset evaluation
  criterion in `fscrit'.

  The methods offered are based on number of vectors 'nv', number of
  classes 'c', and data dimension 'd'. The logic is:

                             available dimension reduction methods

  c == 2, nv <= d            none, Golub, SVD, EMAP
  c == 2, nv > d             none, Golub, PCA, FLD, EMAP
  c > 2, nv <= d             none, SVD, EMAP
  c > 2, nv > d              none, PCA, FLD, EMAP

  The 'none' option is the default.
*/
int input_dr(FILE *outdev, int nv, int d, int c, int *idr, int *fscrit);

/*
  Read feature selection method.
*/
int input_fsel(FILE *outdev);

/*
  Write prediction for vector `idx' in `dset' to 'fptr' in PCP_RCL
  format. 

  `tset' is the training dataset. It is used to extract the actual
  class name for the input vector. This is brain-dead; it is a
  consequence of the current one-class-per-file input format. This has
  to be converted to actual class labels, in which case the argument
  will become obsolete.
*/
void write_rcl(FILE *fptr, int idx, struct dataset *dset, struct dataset *tset);

/*
  Display classification results for 'test_set'. The prediction were
  obtained using 'algorithm'.

  The function employs the following logic to evaluate and display the
  predictions.

  If test_set->c > 1, we assume that the test_set class labels are the
  correct classifications of the test samples, and compute error rates
  correspondingly. The test_set file names are used as class labels.

  If test_set->c == 1, we interpret that to mean that the actual class
  labels for the test_set are not known, and therefore error rates
  cannot be calculated; hence the function just displayes the
  (integer) class IDs.

  If actual class labels can be established using the above logic, and
  if 'verbose' is 1, the function displays class prediction for all
  vectors. If 'verbose' is 0, only misclassified vectors are
  displayed.

  If actual class labels are unknown (i.e., test_set->c == 1),
  'verbose' is ignored, and the function displays prediction for all
  vectors.
*/
void predict_disp(struct dataset *test_set, int verbose, int algorithm);

/*
  Extract 'subset_id'-th 'training_set' and 'test_set' from
  'x'. 'test_set' is obtained by extracting vectors listed in 'sx'
  from 'x' (see xss() for structure of 'sx'). 'training_set' has the
  remaining vectors in 'x'.
*/
int extract_sets(float **x, int subset_id, int c, int *nd, int d, 
		 int **sx, int **lx, float ***training_set, float ***test_set, 
		 FILE *fdbg);

void log_tt(FILE *fdbg, char *msg, int c, int d, int *training_nd, 
	    int *test_nd, float **training_set, float **test_set);

/*
  Log cross-validation subset 'idx'.
*/
void log_ites(FILE *fdbg, int idx, int c, int nv, int **lxc, int **sxc);

/*
  Return pseudo-random number in [0, 1].
*/
float float_rand(void);

/*
  Calculate and display eigenvalues and condition number of covariance
  matrices of training data set.
*/
void p_eigen(int *errc);

/*
  Calculate inverse covariance matrices and determinants. In case of
  success, set 'errc' to 0, otherwise set to error code.  Possible
  errors are <errno.h> codes for malloc()/calloc() functions, and
  LERR_SINGCOV (singular covariance matrix).
*/
void p_sigma(int *errc);

void xlearn_disp(int *errc, char **xname, char *default_name);

/*
  Print message in the designated area. The function has variable
  number of arguments. 'nargs' is the number of arguments following
  'nargs'. The following arguments are int (message code) and, if
  nargs is larger than 1, (char *).  The second argument is name
  (usually of a file causing error), if message code is non-zero. If
  message code is zero, the second argument is printed as is.

  TBD: change name. The name is bad since not all messages are
  necessarily error messages.
*/
void errmsg(int nargs, ...);

/*
  Create a temporary file in the current directory and return the
  name. The file is created with permissions -rw-r--r--.

  Return (char *) 0 in case of error and set errno. The errors are
  malloc(), mkstemp() and fchmod() errors.
*/
char *tempfile(void);

/*
  Set 'dset1' and 'dset2' to the deep or shallow copies of TEDS and
  TDS, respectively, depending on user input. If the copies are deep,
  the current data sets will get replaced by the result of the
  subsequent operations; it the copies are shallow, new data sets will
  be created and saved to disk. 'iffix' is used to define the file
  names for the saved data sets.

  Return the mode (replace data sets -> 1, do not replace -> 0).
*/
int input_replace(struct dataset **dset1, struct dataset **dset2, char *iffix);

/*
  Utility function for feature extraction and selection. Saves
  'dset1', 'dset2'. If 'mode' is P_REPLACE, updates the status file.

  In case of error, return -1 and set 'errc'. In case of file error,
  set file name in 'xname'.
*/
int save_datasets(struct dataset *dset1, struct dataset *dset2, int mode, 
		  int *errc, char **xname);

/*
  Save the clusters defined by 'ncl' (number of clusters), 'ccard'
  (cluster cardinalities), 'clist' (list of vectors in the clusters),
  using file 'format'.

  If 'format' is Named Vector format (DATASET_FF_VECTOR), and TDS is
  in the Raw format (DATASET_FF_RAW) , we store vector IDs instead of
  the (non-existent) vector names.

  In case of error, set 'errc' and set file name in 'xname' (the only
  possible errors are file errors).
*/
void save_clusters(int ncl, int *ccard, int *clist, int format, int *errc, char **xname);

/*
  Read dataset file format from 'indev'.
*/
int input_format(FILE *indev, FILE *outdev);

/*
  A convenient memory allocator for simplex()
  optimization. Initializes the criterion vector `fval' and simplex
  `smx'.
*/
int smplx_alloc(float **fval, float ***smx, int n);

/*
  Accept input parameters and pass them to the corresponding ensemble
  (bagging or boosting) learning function.  

  In case of error, set 'errc'. If error is file access error, set
  'xname'.
*/
void pcp_ensemble(int method, int *errc, char **xname, int dbg);

/*
  Combine matrices `x1' and `x2' with `n1' and `n2' rows,
  respectively. The combination is an array of pointers pointing to
  rows of x1 followed by rows of x2.
  
  In case of malloc() failure, return (float **) 0 and set errno.
*/
float **combine_x(float **x1, int n1, float **x2, int n2);

#endif


syntax highlighted by Code2HTML, v. 0.9.1