/*
  File name: pcp.h
  Created by: Ljubomir Buturovic
  Created: 08/19/2001
  Purpose: C declarations for PCP.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#ifndef PCP_H
#define PCP_H


#include <stdio.h>
#include "dataset.h"

struct dataset *teds;  /* test (vault) data set */
struct dataset *tds;   /* training data set */

int  debug;             /* 1: run in debug mode */

#define MAX_INT_DIGITS              30 /* max. number of digits in an integer */

#define PCP_PCP                     "pcp" /* the PCP keyword */

#define EUCLIDEAN_DIST              1 /* k-NN distance codes */
#define CITY_BLOCK_DIST             2
#define MAHALANOBIS_DIST            3
#define PCP_N_DIST                  3 /* number of available distances for k-NN */
#define PCP_VT100_COLOR             "34" /* PCP color (blue:34; red: 31) */

#define OUTPUT_MSG                  "Short (0) or long (1) output [0]:"
#define LD_MSG                      "Enter linear discriminant file name "
#define CLASSIFIER_MSG              "Enter the classifier file name "
#define PCA_MSG                     "Computing principal components - please wait..."

/*
  Error and status codes. When introducing a new error/status code:

  - define the new code below
  - define the corresponding message
  - update create_status_table()
  - update errmsg()
*/

/*
  Undefined TDS, TEDS.
*/
#define PERR_UNDEFINED_TDS           10000
#define PERR_UNDEFINED_TEDS          10001
#define PERR_UNDEFINED               10002

/*
  Operation only supported for two classes.
*/
#define PERR_TWO_CLASS               10004

/*
  At least two classes required for the operation.
*/
#define PERR_ONE_CLASS               10005

/*
  Each training class must have at least two samples.
*/
#define PERR_ONE_SAMPLE              10006

/*
  Bad keyboard input data. Examples: user enters float where integer is
  required.
*/
#define PERR_BAD_INPUT               10007

/*
  Incompatible dimensions (for example, for matrix multiplication).
*/
#define PERR_INC_DIM                 10008

/*
  Inconsistency between MLP and dataset.
*/
#define PERR_INCONSISTENT_MLP        10009

/*
  Unrecognized MLP file format.
*/
#define PERR_UNRECOGNIZED_MLP        10010

/*
  SVM model not recognized.
*/
#define PERR_UNRECOGNIZED_SVM        10011

/*
  SVD feature extraction error: data dimension is less than the number
  of data points.
*/
#define PERR_INCONSISTENT_SVD        10012

/*
  Feature extraction error: data dimension is greater than the number
  of data points.
*/
#define PERR_INCONSISTENT_FE         10013

/*
  Data set inconsistent with the given mapping file.
*/
#define PERR_INCONSISTENT_MAP        10014

/*
  Bad input file format other than the specific file format
  errors. Example: number of columns in an input file is less than
  declared; input file has inconsistent number of rows.
*/
#define PERR_BAD_INPUT_FILE          10015

/*
  Bad feature ranking file. Typically: ranking file lists feature
  indexes which are outside of allowed range (1..d).
*/
#define PERR_ILLEGAL_RNK_FILE        10016

/*
  Data file inconsistent with PCP_STS (for example, a data file has
  changed number of features, or reduced number of vectors, between
  two PCP sessions).
*/
#define PERR_INCONSISTENT_FILE       10017

/*
  Status codes.
*/
#define PSTS_DONE                    1000
#define PSTS_INVCOV                  1001

/*
  Assumed max. value of system (errno) error codes.
*/
#define PMAX_ERRNO                    127

/*
  Error/status messages.
*/
#define PMSG_DONE                    "Done."
#define PMSG_EACCES                  "Permission denied, file"
#define PMSG_FILE_ACCESS             "Permission denied"
#define PMSG_ENOENT                  "No such file or directory, file"
#define PMSG_INCONSISTENT_FILE       "Status file pcp.sts is inconsistent with input file"
#define PMSG_BAD_INPUT_FILE          "Bad input file"
#define PMSG_INCONSISTENT_MAP        "The dataset is inconsistent with the linear mapping."
#define PMSG_INCONSISTENT_MLP        "The dataset is inconsistent with the MLP file"
#define PMSG_INCONSISTENT_SVD        "The dimension of the data must be greater than the number of data points."
#define PMSG_INCONSISTENT_FE         "Data dimension must be less or equal the number of data points."
#define PMSG_UNDEFINED_TDS           "The training dataset is not defined."
#define PMSG_UNDEFINED_TEDS          "The test dataset is not defined."
#define PMSG_UNDEFINED               "The datasets are not defined."
#define PMSG_UNRECOGNIZED_ERROR      "Unrecognized error code "
#define PMSG_TOO_LARGE               "The dataset is too large."
#define PMSG_DR                      "Dimension reduction error."
#define PMSG_EIGEN_ANALYSIS          "Performing eigenanalyses of covariance matrices - please wait..."
#define PMSG_INVCOV                  "Inverting covariance matrices..."
#define PMSG_UNRECOGNIZED_MLP        "Unrecognized MLP file format, file "
#define PMSG_UNRECOGNIZED_SVM        "Unrecognized SVM file format, file "
#define PMSG_ILLEGAL_RNK_FILE        "The dataset is inconsistent with the feature rank file"
#define PMSG_BAD_INPUT               "Bad keyboard input."
#define PMSG_TWO_CLASS               "This operation only available for two classes."
#define PMSG_ONE_CLASS               "At least two classes must be defined."
#define PMSG_ONE_SAMPLE              "Each training class must have at least two samples."

#define PMSG_LIN_OUTPUT_FNAME        "Enter linear transformation output file name "
#define PMSG_LIN_INPUT_FNAME         "Enter transformation matrix file name "

#define LMSG_FILE_FORMAT             "Unrecognized data file format."
#define LMSG_DESCENT                 "Warning: optimization procedure unable to find descent direction."
#define LMSG_LNSEARCH                "Warning: line search failure in optimization procedure."
#define LMSG_SINGCOV                 "Singular covariance matrix."
#define LMSG_SINGULAR                "Attempted inversion of singular matrix."
#define LMSG_INTERNAL                "Internal software error."
#define LMSG_INCONSISTENT_LIN        "The dataset is inconsistent with the linear classifier."
#define LMSG_INCONSISTENT_MODEL      "The dataset is inconsistent with the model."
#define LMSG_ITMAX                   "Reached iteration limit in an iterative procedure."
#define LMSG_VAR_NCOL                "Variable number of columns in an input file."

#define P_MAXINT_LEN                30 /* max. integer length (number of digits) */
#define P_MAX_LOGD                  20 /* max. logging dimension */
#define P_MAX_LINE_LEN            1000 /* max. command input line length */

/*
  Menu codes.
*/
#define PCP_MENU_MAIN              1
#define PCP_MENU_LPAR              2
#define PCP_MENU_QPAR              3
#define PCP_MENU_LOAD              4
#define PCP_MENU_FEXTR             5
#define PCP_MENU_PCA               6
#define PCP_MENU_PC                7
#define PCP_MENU_MLP               8
#define PCP_MENU_BAYES             9
#define PCP_MENU_SVM               10
#define PCP_MENU_PAC               11
#define PCP_MENU_LD                12
#define PCP_MENU_KNN               13
#define PCP_MENU_FSEL              14
#define PCP_MENU_XPAR              15

/*
  PCP menu messages (displayed at bottom of page).
*/
#define PCP_MMSG_B                 "Press a..b."
#define PCP_MMSG_C                 "Press a..c."
#define PCP_MMSG_D                 "Press a..d."
#define PCP_MMSG_E                 "Press a..e."
#define PCP_MMSG_F                 "Press a..f."
#define PCP_MMSG_G                 "Press a..g."
#define PCP_MMSG_H                 "Press a..h."

/*
  PCP user messages.
*/
#define PCP_UMSG_RAW               "Use raw (0) or normalized data (1) [0]:"
#define PCP_UMSG_FSEL_1     	   "Use Euclidean (%d), Pearson (%d), Golub (%d), 1-NN (%d) or Bayes (%d) criterion [%d]:"
#define PCP_UMSG_FSEL_2            "Use Euclidean (%d), Pearson (%d), 1-NN (%d) or Bayes (%d) criterion [%d]:"
#define PCP_UMSG_FSEL_3            "Use 1-NN (%d), inter-intra distance (%d) or Bayes criterion (%d) [%d]:"
#define PCP_UMSG_CHOICE            "Please enter one of the choices offered."
#define PCP_UMSG_FEATSEL           "Enter feature selection method: feature ranking (%d), forward selection (%d), or backward elimination (%d) [%d]:"
#define PCP_UMSG_NFEAT             "Enter number of features to select (1..%d) [%d]:"
#define PCP_UMSG_DR_SM             "Choose dim. reduction method: none (%d), SVD (%d), EMAP (%d), feature ranking (%d), forward selection (%d), or backward elimination (%d) [%d]:"
#define PCP_UMSG_DR_LG             "Choose dim. reduction method: none (%d), FLD (%d), PCA (%d), EMAP (%d), feature ranking (%d), forward selection (%d), or backward elimination (%d) [%d]:"
#define PCP_UMSG_KERNEL_TYPE       "Enter kernel type (%1d: linear; %1d: polynomial; %1d: RBF; %1d: sigmoid) [%1d]:"
#define PCP_UMSG_KERNEL            "Enter kernel type (%1d: linear; %1d: RBF) [%1d]:"
#define PCP_UMSG_SVM_TYPE          "Enter SVM type (%1d: NU-SVM; %1d: C-SVM) [%1d]:"
#define PCP_UMSG_COST              "Enter cost parameter C [%5.2f]:"
#define PCP_UMSG_NU                "Enter nu (max. allowed frac. of training errors)"
#define PCP_UMSG_DEGREE            "Enter degree of the polynomial [%5.2f]:"
#define PCP_UMSG_GAMMA             "Enter gamma [%12.6f]:"
#define PCP_UMSG_COEF0             "Enter coef0 in kernel function [%5.2f]:"
#define PCP_UMSG_BAD_SVM_PARAMS    "Bad parameters, please re-enter: %s"
#define PCP_UMSG_XSV               "Enter SVM cross-validation file name "
#define PCP_UMSG_MODEL             "Enter output model file name "
#define PCP_UMSG_CCOSTS            "Change class costs (1: yes; 0: no) [0]:" 
#define PCP_UMSG_CLASS_COST        "Enter class %5d cost [%5.2f]:"
#define PCP_UMSG_SEED              "Enter seed for pseudo-random number generator [%5d]:"
#define PCP_UMSG_MAXIT             "Enter maximum number of iterations [%d]:"
#define PCP_UMSG_NMAXIT            "Enter number of iterations (-1 for no iterations) [%d]:"
#define PCP_UMSG_NCLASSES          "Enter number of classes:"
#define PCP_UMSG_CLASS_FNAME       "Enter file name for class"
#define PCP_UMSG_NEXP              "Enter number of experiments [%d]:"
#define PCP_UMSG_XFNAME            "Enter cross-validation file name "
#define PCP_UMSG_NBOOST            "Enter number of boosting models (0 for error-based number of models) [%5d]:"
#define PCP_UMSG_INIT_VERTEX       "Enter name of file with initial vertex:"
#define PCP_UMSG_SVM_FNAME         "Enter SVM model file name "
#define PCP_UMSG_DIM               "Enter dimension of transformed space (%d..%d):"
#define PCP_UMSG_KMIN              "Enter kmin [1]:"
#define PCP_UMSG_KMAX              "Enter kmax (%d..%d) [%5d]: "
#define PCP_UMSG_FILENAME          "Enter file name "
#define PCP_UMSG_FNAME_RNK         "Enter feature rank output file name "
#define PCP_UMSG_FNAME_RNK_2       "Enter feature ranking file name "
#define PCP_UMSG_FNAME_SEL         "Enter feature subset output file name "
#define PCP_UMSG_NSEL              "Enter number of features to select [%d]:"
#define PCP_UMSG_FNAME_MLP         "Enter MLP file name "
#define PCP_UMSG_XMP_FNAME         "Enter MLP cross-validation file name "
#define PCP_UMSG_DTHD              "Enter decision threshold [0.0]:"
#define PCP_UMSG_PAC_FNAME         "Enter parametric quadratic classifier file name "
#define PCP_UMSG_NCOMB             "Enter number of classifiers to combine (>= 1) [%5d]:"
#define PCP_UMSG_NDR               "Enter dimension of transformed space: (1..%d) [%5d]:"
#define PCP_UMSG_NDIM              "Enter dimension of transformed space (%d..%d) [%d]:"
#define PCP_UMSG_NXVAL             "Enter number of cross-validation subsets (%d..%d) [%5d]:"
#define PCP_UMSG_NHIDDEN           "Enter number of hidden layers (>= 1) [1]:"
#define PCP_UMSG_NNODES            "Enter number of nodes in hidden layer %5d:"
#define PCP_UMSG_INIT_WEIGHTS      "Enter amplitude of initial weights [%5.2f]:"
#define PCP_UMSG_OPT_METHOD        "Enter optimization method - conj. grad (%d), grad. descent (%d) [%5d]:"
#define PCP_UMSG_LRATE             "Enter learning rate [%5.2g]:"
#define PCP_UMSG_MOMENTUM          "Enter momentum term [%5.2g]:"
#define PCP_UMSG_NMLP              "Enter number of MLPs to combine (>= 1) [%5d]:"
#define PCP_UMSG_NN                "Enter number of nearest neighbors (1..%d) [%5d]:"
#define PCP_UMSG_NN1               "Enter number of nearest neighbors (>= 1) [%5d]:"
#define PCP_UMSG_XNN               "Enter k-NN cross-validation file name "
#define PCP_UMSG_ONAME             "Enter output file name "
#define PCP_UMSG_KSUB              "Enter number of k-NN subsets to combine (>= 1) [%5d]:"
#define PCP_UMSG_KDIST  	   "Use Euclidean (%d), city-block (%d) or Mahalanobis distance (%d) [%d]:"
#define PCP_UMSG_LD_FNAME          "Enter linear discriminant model file name "
#define PCP_UMSG_PLC_FNAME         "Enter parametric linear classifier file name "
#define PCP_UMSG_EMAP_S1           "Starting point: SVD (%d), random (%d), or file (%d) [%d]:"
#define PCP_UMSG_EMAP_S2           "Starting point: Fisher (%d), PCA (%d), random (%d), or file (%d) [%d]:"
#define PCP_UMSG_EMAP_S3           "Starting point: PCA (%d), random (%d), or file (%d) [%d]:"
#define PCP_UMSG_INDET             "Use indeterminate region: yes (%d) or no (%d) [%d]:"
#define PCP_UMSG_REPLACE           "Replace current dataset(s) (%d) or not (%d) [%d]:"
#define PCP_UMSG_HFORMAT           "File format: header line (0: no; 1: yes) [0]:"
#define PCP_UMSG_RFORMAT           "File format: named rows (0: no; 1: yes) [0]:"
#define PCP_UMSG_LOAD              "Load training (%d) or test (%d) dataset [%d]:"
#define PCP_UMSG_MLP_MSEL1         "Enter starting number of nodes (>= 1) [%d]:"
#define PCP_UMSG_MLP_MSEL2         "Enter ending number of nodes (>= %d) [%d]:"
#define PCP_UMSG_MLP_MSEL3         "Enter step (>= %d) [%d]:"
#define PCP_UMSG_MLP_MSEL4         "Enter starting number of iterations (>= %d) [%d]:"
#define PCP_UMSG_MLP_MSEL5         "Enter ending number of iterations (>= %d) [%d]:"
#define PCP_UMSG_KNN_MSEL1         "Enter starting number of nearest neighbors (1..%d) [%d]:"
#define PCP_UMSG_KNN_MSEL2         "Enter ending number of nearest neighbors (%d..%d) [%d]:"
#define PCP_UMSG_KNN_MSEL3         "Enter step (>= 1) [%d]:"
#define PCP_UMSG_ENSEMBLE          "Choose bagging (%d) or Adaboost (%d) [%d]: "
#define PCP_UMSG_SVD_MODE          "Use training dataset (%d) or both (%d) [%d]: "
#define PCP_UMSG_SVD               "Calculating SVD transform - please wait..."

/*
  SVM kernel types. The LIBSVM kernel types cannot be used because one
  of the types is 0, which is reserved for the default kernel type.
*/
#define PCP_SVM_K_NONE             -1 
#define PCP_SVM_K_LINEAR           1 
#define PCP_SVM_K_POLY             2
#define PCP_SVM_K_RBF              3
#define PCP_SVM_K_SIGMOID          4

/*
  SVM parameter defaults.
*/
#define PCP_SVM_DFLT_CACHE         40
#define PCP_SVM_DFLT_C             1000
#define PCP_SVM_DFLT_NU            0.2
#define PCP_SVM_DFLT_GAMMA         0.01
#define PCP_SVM_DFLT_DEGREE        3
#define PCP_SVM_DFLT_COEF0         0.0
#define PCP_SVM_DFLT_EPS_C         0.001
#define PCP_SVM_DFLT_EPS_NU        0.00001
#define PCP_SVM_DFLT_PROB          0

/*
  MLP model selection defaults.
*/
#define PCP_MLP_DFLT_NHL           5   /* min. number of nodes */
#define PCP_MLP_DFLT_HSTP          5   /* number of nodes, step factor */
#define PCP_MLP_DFLT_MIN           100 /* min. number of iterations */
#define PCP_MLP_DFLT_ISTP          5   /* number of iterations, step factor */

/*
  k-NN model selection defaults.
*/
#define PCP_KNN_DFLT_KMIN          1   /* min. number of neighbors */
#define PCP_KNN_DFLT_KSTEP         1   /* number of neighbors, increment factor */
#define PCP_KNN_DFLT_KMAX          20  /* max. number of neighbors */

/*
  Default values.
*/
#define PCP_DFLT_NEXP              1
#define PCP_DFLT_NITER             20
#define PCP_DFLT_NFSEL             2   /* number of features for subset selection */

/*
  Standard file names.

  File               Description

  PCP_STS            PCP status
  PCP_DBG            debug file name
  PCP_DAT            
  PCP_ERR            PCP error messages

  PCP_MLP            MLP output
  PCP_XMP            MLP cross-validation results
  PCP_RCL            MLP results
  PCP_KNN            k-NN results
  PCP_XNN            k-NN cross-validation results
  PCP_SVM            SVM model
  PCP_XSV            SVM cross-validation results
  PCP_LIN            linear discriminant model
  PCP_XLD            linear-discriminant cross-validation results
  PCP_BAG            temporary model file for bagging cross-validation
  PCP_ADA            temporary model file for Adaboost cross-validation
  PCP_XTR            training sets for cross-validation
  PCP_XTE            test sets for cross-validation
  PCP_LVM            TDS in LIBSVM-compatible file format
  PCP_LIN            default name for linear transformation matrix
  PCP_RNK            feature rank file
  PCP_SET            feature subset file
  PCP_PLC            parametric linear classifier
  PCP_PPC            parametric pairwise classifier
  PCP_PQC            parametric quadratic classifier
  PCP_XPL            cross-validation results for parametric linear classifier
  PCP_EMP            EMAP transformation file name
  PCP_XSM            cross-validation summary file
  PCP_XSL            file with cross-validation feature subsets
  PCP_XSF            like PCP_XSL, transposed

  PCP_CLX            suffix for cluster files 
*/

#define PCP_STS                    "pcp.sts"
#define PCP_DBG                    "pcp.dbg"
#define PCP_DAT                    "pcp.dat"
#define PCP_ERR                    "pcp.err"

#define PCP_MLP                    "pcp.mlp"
#define PCP_RCL                    "pcp.rcl"
#define PCP_KNN                    "pcp.knn"
#define PCP_XMP                    "pcp.xmp"
#define PCP_XSV                    "pcp.xsv"
#define PCP_XLD                    "pcp.xld"
#define PCP_XNN                    "pcp.xnn"
#define PCP_BAG                    "pcp.bag"
#define PCP_ADA                    "pcp.ada"
#define PCP_SVM                    "pcp.svm"
#define PCP_LIN                    "pcp.lin"
#define PCP_XTR                    "pcp.xtr"
#define PCP_XTE                    "pcp.xte"
#define PCP_LVM                    "pcp.lvm"
#define PCP_LIN                    "pcp.lin"
#define PCP_RNK                    "pcp.rnk"
#define PCP_SET                    "pcp.set"
#define PCP_PLC                    "pcp.plc"
#define PCP_PPC                    "pcp.ppc"
#define PCP_PQC                    "pcp.pqc"
#define PCP_XPL                    "pcp.xpl"
#define PCP_XPQ                    "pcp.xpq"
#define PCP_EMP                    "pcp.emp"
#define PCP_BEE                    "pcp.bee"
#define PCP_XSM                    "pcp.xsm"
#define PCP_MSL                    "pcp.msl"
#define PCP_XSL                    "pcp.xsl"
#define PCP_XSF                    "pcp.xsf"

#define PCP_CLX                    ".clx" 

/*
  Standard filename length in PCP.
*/
#define PCP_FLEN                   30

/*
  In certain display functions, up to PCP_DISPFLEN characters of a
  filename can be displayed.
*/
#define PCP_DISP_FLEN                30

/*
  The standard length of display line in PCP.
*/
#define PCP_QLEN                     78

/*
  The longest allowed user message in PCP.
*/
#define PCP_LONG_LEN                 1000

/*
  The long line.
*/
#define PCP_LQLEN                    100

/*
  Display macros - each PCP_QLEN wide.
*/
#define PCP_LINE        "+----------------------------------------------------------------------------+\n"
#define PCP_XLINE       "+----------------------------------------------------------------------------+\n"
#define PCP_EMPTY_LINE  "|                                                                            |\n"

/*
  Wide versions of PCP_QLEN, PCP_LINE, PCP_EMPTY_LINE.
*/

#define PCP_WLEN        100
#define PCP_WLINE       "+--------------------------------------------------------------------------------------------------+\n"
#define PCP_EMPTY_WLINE "|                                                                                                  |\n"

/*
  Number of info columns in certain output files (.mlp, .xmp, ...).
*/
#define PCP_HFT                    40

/*
  Unassigned class ID/name.
*/
#define PCP_UNASSIGNED             -1
#define PCP_UNASSIGNED_NAME        "unassigned"

/*
  Classification algorithm codes.
*/
#define PALG_MLP                     1      /* MLP */
#define PALG_BAG_MLP                 2
#define PALG_SVM                     3      /* SVM */
#define PALG_ADABOOST_MLP            4
#define PALG_ADABOOST_SVM            5
#define PALG_BAG_SVM                 6      
#define PALG_LIN                     7      /* linear discriminant */
#define PALG_BAG_LIN                 8
#define PALG_ADABOOST_LIN            9
#define PALG_KNN                     10     /* k-NN */
#define PALG_BAG_KNN                 11
#define PALG_PLC                     12     /* parametric linear classifier */
#define PALG_BAG_PLC                 13
#define PALG_PQC                     14     /* parametric quadratic classifier */
#define PALG_BAG_PQC                 15

/*
  Dimension reduction algorithm codes.

  The codes should be self-explanatory, except for PDR_RANDOM and
  PDR_FILE. These are the 'algorithms' used to generate starting
  points for EMAP dimension reduction. PDR_RANDOM, as the name
  implies, consists in pseudo-randomly generated feature extraction
  mapping. PDR_FILE refers to the mapping read from a file provided by
  the user.

  SVD is only available if the number of vectors 'nv' is less or equal
  dimensionality of input space 'd', FLD and PCA are only available
  otherwise, according to the following rules:

                             available dimensionality reduction methods

  nv <= d                    SVD, no FLD, PCA
  nv > d                     all except SVD

*/
#define PDR_NONE                     0
#define PDR_SVD                      1
#define PDR_FISHER                   2
#define PDR_PCA                      3
#define PDR_EMAP                     4
#define PDR_RANKING                  5
#define PDR_FORWARD                  6
#define PDR_BACKWARD                 7
#define PDR_FLOAT_FORWARD            8 
#define PDR_FLOAT_BACKWARD           9 
#define PDR_PLUS_L_MINUS_R           10
#define PDR_BB                       11
#define PDR_RANDOM                   12
#define PDR_FILE                     13

/*
  Feature selection criteria for feature ranking methods. NOTE: all
  criteria are `bigger is better'.
*/
#define  PCP_FSEL_EUCLIDEAN          1 /* Euclidean distance, used for individual feature selection */
#define  PCP_FSEL_PEARSON            2 /* Pearson corr. coefficient, used for individual feature selection */
#define  PCP_FSEL_GOLUB              3 /* specialized distance described in Golub et. al, Science vol. 286, Oct 15 1999 */
#define  PCP_FSEL_KNN                4 /* 1-NN accuracy rate (100- 1-NN error rate) */
#define  PCP_FSEL_BAYES              5 /* Bayes accuracy estimate (100-Bayes error rate) */
#define  PCP_N_FSCRIT                5 /* the number of available feature ranking criteria */

/*
  Feature selection criteria which apply to subset selection methods.
*/
#define  PCP_FSUB_KNN                1 /* 1-NN accuracy rate */
#define  PCP_FSUB_IN_IN              2 /* inter-intra distance */
#define  PCP_FSUB_BAYES              3 /* Bayes accuracy estimate */
#define  PCP_N_SUB_FSCRIT            3 /* the number of available feature subset selection criteria. */

#define  PDS_NONE                    "none"
#define  PDS_SVD                     "Singular Value Decomposition"
#define  PDS_FISHER                  "Fisher's linear discriminant"
#define  PDS_PCA                     "Principal Component Analysis"
#define  PDS_RANKING                 "feature ranking"
#define  PDS_EMAP                    "EMAP"
#define  PDS_FORWARD                 "forward selection"
#define  PDS_BACKWARD                "backward elimination"
#define  PDS_FLOAT_FORWARD           "forward floating search"
#define  PDS_FLOAT_BACKWARD          "backward floating search"

#define  PCP_SCRIT_EUCLIDEAN         "Euclidean"
#define  PCP_SCRIT_PEARSON           "Pearson"
#define  PCP_SCRIT_GOLUB             "Golub"
#define  PCP_SCRIT_KNN               "1-NN error rate"
#define  PCP_SCRIT_BAYES             "Bayer error"
#define  PCP_SCRIT_IN_IN             "inter-intra distance"


#define  PCP_FSEL_LPAR               10
#define  PCP_FSEL_RPAR               5

/*
  Replace or not the existing data set.
*/
#define P_DO_NOT_REPLACE             0
#define P_REPLACE                    1

/*
  Data transformation types.
*/
#define P_NORMALIZE                  1
#define P_MAP                        2

/*
  Launch Bayes error estimation for TDS. 

  'errc' is output error code. If 'debug' is not 0, send debugging
  information to 'pcp.dbg' file.

  For 'approximate_l_mode' == 1, use approximate L mode.
*/
void p_bayes(int *errc, int approximate_l_mode, int debug);

/*
  Display Bayes error estimation results.
*/
void p_disp_bayes(int *errc, char **xname);

/*
  Read one character from keyboard, no echo. Return it in 'key'.
*/
int read_keyboard(int *key);

/*
  Wait for any character, no echo.
*/
void pwait(void);

/*
  In-place float sorting function based on algorithm by
  R. Sedgewick. In my experience, it is about 40% faster than the C
  library function qsort(). The sedgesort() function which implements
  the algorithm is downloaded from
  http://www.yendor.com/programming/sort/. ljb, 10/14/2001

  BIG NOTE: 'vector' needs to have len+1 elements, instead of the
  usual len. The first len elements are the numbers to be sorted. The
  last element is used internally by sedgesort() function (which is
  called by ssort()) as a 'sentinel'. This is obviously a restriction
  on part on finsort(); it means that the user must allocate one more
  element for each vector to be sorted. If this is not acceptable, use
  fsort(), which creates a new vector with the extra element.
*/
void finsort(float *vector, int len);

/*
  Floating-point sorting function. The function allocates space for
  the sorted vector. The input 'vector' is not modifed.

  Return (float *) in case of error and set errno.
*/
float *fsort(float *vector, int len);

/*
  Floating-point sorting function. hsort() uses modified version of
  heapsort() to sort 'm' smallest values in a vector. This makes sense
  if 'k' is considerably smaller than the length of the vector;

  otherwise, sedgesort() is faster.
*/
void hsort(float *vector, int *len, int *m);

/*
  In-place integer sorting function based on algorithm by
  R. Sedgewick. 

  BIG NOTE: 'vector' needs to have len+1 elements. The first len
  elements are the numbers to be sorted. The last element is used
  internally by sedgesort() function (which is called by ssort()) as a
  'sentinel'.
*/
void intsort(int *vector, int len);

/*
  Interface to svd(), a linear transformation of high-dimensional
  vectors using Singular Value Decomposition (SVD) of a covariance
  matrix (a variant of Karhunen-Loeve transform).

  In case of error, set 'errc'. In case of file error, set the file
  name in 'xname'.

  Error codes: 1. this function assumes that the rows of 'input' are
  high-dimensional, that is typically nvec << d. If nvec >= d, set
  'errc' to PERR_INC_DIM.
*/
void p_svd(int *errc, char **xname, int dbg);

/*
  Feature extraction using SVD. For input 'nv' by 'd' data matrix 'x',
  returns linear transformation matrix 'd' by 'nv'. It is assumed that
  'nv' is <= 'd'.

  If nv is > d, return NULL. In case of failure to iterate in eigen(),
  return NULL and set errc to LERR_ITMAX.  In case of malloc() error,
  return NULL and set errc to the corresponding errno.
*/
float **svd_transform(float **x, int nv, int d, int *errc);

/*
  Implementation of EMAP functionality (linear feature transformation
  which minimizes Bayes error estimate in the transformed space). The
  function computes the transformation matrix using the training data
  set, and saves it in a file.

  In case of error, set 'errc'. If 'errc' is file access error,
  'xname' is name of file associated with the error. If 'dbg' is > 0,
  send debugging information to debug file.
*/
void p_emap(int *errc, char **xname, int dbg);

/*
  Intermediate data loading function.
*/
void p_load(int *errc, char **xname);

/*
  Startup data load. Reads the status file, then loads data from input
  files into the global C data structures 'teds' and 'tds'. 
  
  In case of success, set 'errc' to 0. Otherwise, set errc to error
  code. If 'errc' is file-related error code, 'xname' is name of
  offending file.
*/
void init_load(int *errc, char **xname);

/*
  Save status of 'test_dataset', 'training_dataset' in 'fname'.
*/
int save_sts(char *fname, struct dataset *test_dataset, struct dataset *training_dataset);

/*
  Copy data set. 
*/
void p_copy(int *errc, char **xname);

/*
  Perform multiple cross-validation learning experiments.
  
  Each experiment consists in the following cross-validation test:
  train the classifier using 'method' on a pseudo-randomly chosen
  subset of the training data set, and test it on the remainder of the
  dataset; then choose another training/test partition, repeat the
  test, etc. This is repeated 'nxval' times, where 'nxval' is the
  number of cross-validation subsets.
*/
void p_xlearn(int method, int *errc, char **xname, int dbg);

/*
  Transform data set(s) using transformation 'ttype'. The available
  values for ttype are P_NORMALIZE and P_MAP. Optionally copy the
  transformed data set(s) into the current data set.
*/
void p_transform(int ttype, int *errc, char **xname);

/*
  Remove datasets (used in case of error).
*/
void remove_datasets(void);

void con(void);

void coff(void);

/*
  Select optimal feature subset in TDS. The function collects
  parameters from the user (output file name and criterion), computes
  the optimal feature subset, and saves it in the output file.
*/
void p_fselect(int *errc, char **xname, int dbg);

/*
  Display the ranking of features in a user-defined file.
*/
void p_fdisp(int *errc, char **xname, int dbg);

/*
  Extract and save a user-defined subset of features from current data
  sets. Optionally, replace the current data sets with the subset.
*/
void p_f_subset(int *errc, char **xname, int dbg);


void eb(int *errc);

/*
  Collect input parameters from the user, call the parametric linear
  classifier function lin_learn() on TDS, save the classifier and
  display results of TDS classification.

  In case of error, return error code and, in case of file error,
  store the offending file name in 'xname'.
*/
int p_lin_learn(char **xname);

/*
  Collect input parameters from the user, call the parametric
  quadratic classifier learning function pqc_learn() on TDS, save the
  classifier and display results of TDS classification.

  In case of error, return error code and, in case of file error,
  store the offending file name in 'xname'.
*/
int p_pqc_learn(char **xname);

/*
  Classify test data set using parametric quadratic classifier stored
  in a file. The function obtains input parameters from the user,
  calls dataset_pqc_predict(), and displays classification results.
  
  In case of success, return 0. In case of failure, return error
  code. In case of file access error, return the relevant file name in
  'xname'.
*/
int p_pqc_predict(char **xname);

/*
  Collect input parameters from the user and call lind_learn() which
  performs the training.
  
  In case of failure, set 'errc'. If error is file access error, set
  'xname' to the name of the file which triggered the error.
*/
void p_lind_learn(int *errc, char **xname, int *dbg);

/*
  Classify test data set using linear classifier 'method' in a
  file. 'method' can be PALG_LIN/PALG_BAG_LIN for linear discriminant
  classifier, or PALG_PLC/PALG_BAG_PLC for parametric linear
  classifier. The function obtains input parameters from the user,
  calls lin_predict(), and displays classification results.
  
  In case of success, return 0. In case of failure, return error
  code. In case of file access error, return the relevant file name in
  'xname'.
*/
int p_lin_predict(int method, char **xname);

/*
  Classify test set using k-NN method, display and save results.
*/
void p_knn(int *errc, char **xname, int *dbg);

/*
  Accept input parameters and pass them to the k-NN bagging function.
  In case of error, set 'errc'. If error is file access error, set
  'xname'.
*/
void p_knn_bagging(int *errc, char **xname, int *dbg);

/*
  Accept input parameters from the user and pass them to MLP learning
  function mlp_learn(). 
  
  In case of error, set 'errc'. If error is file access error, set
  'xname'.
*/
void p_mlp_learn(int *errc, char **xname, int *dbg);

void p_mlp_predict(int *errc, char **xname);

/*
  This function collects SVM learning parameters from the user, and
  then calls svm_train() which performs the SVM training.

  In case of successful completion, set 'errc' to 0. In case of error,
  set errc to 'errno'. If error is file access error, set 'file'.
*/
void p_svm_learn(int *errc, char **xname, int dbg);

/*
  Classify test data set using SVM classifier stored in 'svm_fname'
  (the file name provided by user), and display classification
  results.

  In case of successful completion, set 'errc' to 0. Otherwise, set
  errc to 'errno'. In case of file error, store the name of the file
  causing the error in 'xname'.
*/
void p_svm_predict(int *errc, char **xname);

/*
  Save training data set in LIBSVM data format. See LIBSVM
  documentation for the format description.

  In case of success, set 'errc' to 0, otherwise set it to 'errno'. If
  error is file access error, store the name of the offending file in
  'xname'.
*/
void p_svm_save(int *errc, char **xname);

/*
  Set the SVM parameters we decided to keep as defaults.
*/
void svm_param_defaults(void *svm_parameters);

/*
  Compute max. feasible value of nu for NU-SVM. Use Proposition 3 in
  Section 6 of
  http://www.csie.ntu.edu.tw/~cjlin/papers/nusvmtutorial.pdf.
*/
float get_svm_nu_max(int *nd, int c);

/*
  Compute max. feasible value of nu for `nxval' NU-SVM
  cross-validation experiments. The function actually computes (tight)
  lower bound on nu, assuming all subsets are of the size
  (nxval-1)*(size/nxval).
*/
float get_svm_nu_xmax(int *nd, int c, int nxval);

/*
  Optimize SVM parameters using simplex.
*/
void pcp_xpar(int *errc, int dbg, char **xname);

/*
  SVM model selection using grid (i.e., exhaustive) search.
*/
void pcp_svm_grid(int *errc, int dbg, char **xname);

void p_disp(int menu_id, int size);

/*
  Optimize SVM parameters using Simplex algorithm.
*/
void pcp_svm_simplex(int *errc, int dbg, char **xname);

/*
  Multi-Layer Perceptron model selection. The function chooses optimal
  number of hidden nodes and number of iterations. It is assumed that
  the MLP has one hidden layer.
*/
void pcp_mlp_xpar(int *errc, int dbg, char **xname);

/*
  k-NN model selection. The function chooses optimal number of nearest
  neighbors.
*/
void pcp_knn_xpar(int *errc, int dbg, char **xname);

#endif 


syntax highlighted by Code2HTML, v. 0.9.1