/*
File name: pcp.h
Created by: Ljubomir Buturovic
Created: 08/19/2001
Purpose: C declarations for PCP.
*/
/*
Copyright 2004 Ljubomir J. Buturovic
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#ifndef PCP_H
#define PCP_H
#include <stdio.h>
#include "dataset.h"
struct dataset *teds; /* test (vault) data set */
struct dataset *tds; /* training data set */
int debug; /* 1: run in debug mode */
#define MAX_INT_DIGITS 30 /* max. number of digits in an integer */
#define PCP_PCP "pcp" /* the PCP keyword */
#define EUCLIDEAN_DIST 1 /* k-NN distance codes */
#define CITY_BLOCK_DIST 2
#define MAHALANOBIS_DIST 3
#define PCP_N_DIST 3 /* number of available distances for k-NN */
#define PCP_VT100_COLOR "34" /* PCP color (blue:34; red: 31) */
#define OUTPUT_MSG "Short (0) or long (1) output [0]:"
#define LD_MSG "Enter linear discriminant file name "
#define CLASSIFIER_MSG "Enter the classifier file name "
#define PCA_MSG "Computing principal components - please wait..."
/*
Error and status codes. When introducing a new error/status code:
- define the new code below
- define the corresponding message
- update create_status_table()
- update errmsg()
*/
/*
Undefined TDS, TEDS.
*/
#define PERR_UNDEFINED_TDS 10000
#define PERR_UNDEFINED_TEDS 10001
#define PERR_UNDEFINED 10002
/*
Operation only supported for two classes.
*/
#define PERR_TWO_CLASS 10004
/*
At least two classes required for the operation.
*/
#define PERR_ONE_CLASS 10005
/*
Each training class must have at least two samples.
*/
#define PERR_ONE_SAMPLE 10006
/*
Bad keyboard input data. Examples: user enters float where integer is
required.
*/
#define PERR_BAD_INPUT 10007
/*
Incompatible dimensions (for example, for matrix multiplication).
*/
#define PERR_INC_DIM 10008
/*
Inconsistency between MLP and dataset.
*/
#define PERR_INCONSISTENT_MLP 10009
/*
Unrecognized MLP file format.
*/
#define PERR_UNRECOGNIZED_MLP 10010
/*
SVM model not recognized.
*/
#define PERR_UNRECOGNIZED_SVM 10011
/*
SVD feature extraction error: data dimension is less than the number
of data points.
*/
#define PERR_INCONSISTENT_SVD 10012
/*
Feature extraction error: data dimension is greater than the number
of data points.
*/
#define PERR_INCONSISTENT_FE 10013
/*
Data set inconsistent with the given mapping file.
*/
#define PERR_INCONSISTENT_MAP 10014
/*
Bad input file format other than the specific file format
errors. Example: number of columns in an input file is less than
declared; input file has inconsistent number of rows.
*/
#define PERR_BAD_INPUT_FILE 10015
/*
Bad feature ranking file. Typically: ranking file lists feature
indexes which are outside of allowed range (1..d).
*/
#define PERR_ILLEGAL_RNK_FILE 10016
/*
Data file inconsistent with PCP_STS (for example, a data file has
changed number of features, or reduced number of vectors, between
two PCP sessions).
*/
#define PERR_INCONSISTENT_FILE 10017
/*
Status codes.
*/
#define PSTS_DONE 1000
#define PSTS_INVCOV 1001
/*
Assumed max. value of system (errno) error codes.
*/
#define PMAX_ERRNO 127
/*
Error/status messages.
*/
#define PMSG_DONE "Done."
#define PMSG_EACCES "Permission denied, file"
#define PMSG_FILE_ACCESS "Permission denied"
#define PMSG_ENOENT "No such file or directory, file"
#define PMSG_INCONSISTENT_FILE "Status file pcp.sts is inconsistent with input file"
#define PMSG_BAD_INPUT_FILE "Bad input file"
#define PMSG_INCONSISTENT_MAP "The dataset is inconsistent with the linear mapping."
#define PMSG_INCONSISTENT_MLP "The dataset is inconsistent with the MLP file"
#define PMSG_INCONSISTENT_SVD "The dimension of the data must be greater than the number of data points."
#define PMSG_INCONSISTENT_FE "Data dimension must be less or equal the number of data points."
#define PMSG_UNDEFINED_TDS "The training dataset is not defined."
#define PMSG_UNDEFINED_TEDS "The test dataset is not defined."
#define PMSG_UNDEFINED "The datasets are not defined."
#define PMSG_UNRECOGNIZED_ERROR "Unrecognized error code "
#define PMSG_TOO_LARGE "The dataset is too large."
#define PMSG_DR "Dimension reduction error."
#define PMSG_EIGEN_ANALYSIS "Performing eigenanalyses of covariance matrices - please wait..."
#define PMSG_INVCOV "Inverting covariance matrices..."
#define PMSG_UNRECOGNIZED_MLP "Unrecognized MLP file format, file "
#define PMSG_UNRECOGNIZED_SVM "Unrecognized SVM file format, file "
#define PMSG_ILLEGAL_RNK_FILE "The dataset is inconsistent with the feature rank file"
#define PMSG_BAD_INPUT "Bad keyboard input."
#define PMSG_TWO_CLASS "This operation only available for two classes."
#define PMSG_ONE_CLASS "At least two classes must be defined."
#define PMSG_ONE_SAMPLE "Each training class must have at least two samples."
#define PMSG_LIN_OUTPUT_FNAME "Enter linear transformation output file name "
#define PMSG_LIN_INPUT_FNAME "Enter transformation matrix file name "
#define LMSG_FILE_FORMAT "Unrecognized data file format."
#define LMSG_DESCENT "Warning: optimization procedure unable to find descent direction."
#define LMSG_LNSEARCH "Warning: line search failure in optimization procedure."
#define LMSG_SINGCOV "Singular covariance matrix."
#define LMSG_SINGULAR "Attempted inversion of singular matrix."
#define LMSG_INTERNAL "Internal software error."
#define LMSG_INCONSISTENT_LIN "The dataset is inconsistent with the linear classifier."
#define LMSG_INCONSISTENT_MODEL "The dataset is inconsistent with the model."
#define LMSG_ITMAX "Reached iteration limit in an iterative procedure."
#define LMSG_VAR_NCOL "Variable number of columns in an input file."
#define P_MAXINT_LEN 30 /* max. integer length (number of digits) */
#define P_MAX_LOGD 20 /* max. logging dimension */
#define P_MAX_LINE_LEN 1000 /* max. command input line length */
/*
Menu codes.
*/
#define PCP_MENU_MAIN 1
#define PCP_MENU_LPAR 2
#define PCP_MENU_QPAR 3
#define PCP_MENU_LOAD 4
#define PCP_MENU_FEXTR 5
#define PCP_MENU_PCA 6
#define PCP_MENU_PC 7
#define PCP_MENU_MLP 8
#define PCP_MENU_BAYES 9
#define PCP_MENU_SVM 10
#define PCP_MENU_PAC 11
#define PCP_MENU_LD 12
#define PCP_MENU_KNN 13
#define PCP_MENU_FSEL 14
#define PCP_MENU_XPAR 15
/*
PCP menu messages (displayed at bottom of page).
*/
#define PCP_MMSG_B "Press a..b."
#define PCP_MMSG_C "Press a..c."
#define PCP_MMSG_D "Press a..d."
#define PCP_MMSG_E "Press a..e."
#define PCP_MMSG_F "Press a..f."
#define PCP_MMSG_G "Press a..g."
#define PCP_MMSG_H "Press a..h."
/*
PCP user messages.
*/
#define PCP_UMSG_RAW "Use raw (0) or normalized data (1) [0]:"
#define PCP_UMSG_FSEL_1 "Use Euclidean (%d), Pearson (%d), Golub (%d), 1-NN (%d) or Bayes (%d) criterion [%d]:"
#define PCP_UMSG_FSEL_2 "Use Euclidean (%d), Pearson (%d), 1-NN (%d) or Bayes (%d) criterion [%d]:"
#define PCP_UMSG_FSEL_3 "Use 1-NN (%d), inter-intra distance (%d) or Bayes criterion (%d) [%d]:"
#define PCP_UMSG_CHOICE "Please enter one of the choices offered."
#define PCP_UMSG_FEATSEL "Enter feature selection method: feature ranking (%d), forward selection (%d), or backward elimination (%d) [%d]:"
#define PCP_UMSG_NFEAT "Enter number of features to select (1..%d) [%d]:"
#define PCP_UMSG_DR_SM "Choose dim. reduction method: none (%d), SVD (%d), EMAP (%d), feature ranking (%d), forward selection (%d), or backward elimination (%d) [%d]:"
#define PCP_UMSG_DR_LG "Choose dim. reduction method: none (%d), FLD (%d), PCA (%d), EMAP (%d), feature ranking (%d), forward selection (%d), or backward elimination (%d) [%d]:"
#define PCP_UMSG_KERNEL_TYPE "Enter kernel type (%1d: linear; %1d: polynomial; %1d: RBF; %1d: sigmoid) [%1d]:"
#define PCP_UMSG_KERNEL "Enter kernel type (%1d: linear; %1d: RBF) [%1d]:"
#define PCP_UMSG_SVM_TYPE "Enter SVM type (%1d: NU-SVM; %1d: C-SVM) [%1d]:"
#define PCP_UMSG_COST "Enter cost parameter C [%5.2f]:"
#define PCP_UMSG_NU "Enter nu (max. allowed frac. of training errors)"
#define PCP_UMSG_DEGREE "Enter degree of the polynomial [%5.2f]:"
#define PCP_UMSG_GAMMA "Enter gamma [%12.6f]:"
#define PCP_UMSG_COEF0 "Enter coef0 in kernel function [%5.2f]:"
#define PCP_UMSG_BAD_SVM_PARAMS "Bad parameters, please re-enter: %s"
#define PCP_UMSG_XSV "Enter SVM cross-validation file name "
#define PCP_UMSG_MODEL "Enter output model file name "
#define PCP_UMSG_CCOSTS "Change class costs (1: yes; 0: no) [0]:"
#define PCP_UMSG_CLASS_COST "Enter class %5d cost [%5.2f]:"
#define PCP_UMSG_SEED "Enter seed for pseudo-random number generator [%5d]:"
#define PCP_UMSG_MAXIT "Enter maximum number of iterations [%d]:"
#define PCP_UMSG_NMAXIT "Enter number of iterations (-1 for no iterations) [%d]:"
#define PCP_UMSG_NCLASSES "Enter number of classes:"
#define PCP_UMSG_CLASS_FNAME "Enter file name for class"
#define PCP_UMSG_NEXP "Enter number of experiments [%d]:"
#define PCP_UMSG_XFNAME "Enter cross-validation file name "
#define PCP_UMSG_NBOOST "Enter number of boosting models (0 for error-based number of models) [%5d]:"
#define PCP_UMSG_INIT_VERTEX "Enter name of file with initial vertex:"
#define PCP_UMSG_SVM_FNAME "Enter SVM model file name "
#define PCP_UMSG_DIM "Enter dimension of transformed space (%d..%d):"
#define PCP_UMSG_KMIN "Enter kmin [1]:"
#define PCP_UMSG_KMAX "Enter kmax (%d..%d) [%5d]: "
#define PCP_UMSG_FILENAME "Enter file name "
#define PCP_UMSG_FNAME_RNK "Enter feature rank output file name "
#define PCP_UMSG_FNAME_RNK_2 "Enter feature ranking file name "
#define PCP_UMSG_FNAME_SEL "Enter feature subset output file name "
#define PCP_UMSG_NSEL "Enter number of features to select [%d]:"
#define PCP_UMSG_FNAME_MLP "Enter MLP file name "
#define PCP_UMSG_XMP_FNAME "Enter MLP cross-validation file name "
#define PCP_UMSG_DTHD "Enter decision threshold [0.0]:"
#define PCP_UMSG_PAC_FNAME "Enter parametric quadratic classifier file name "
#define PCP_UMSG_NCOMB "Enter number of classifiers to combine (>= 1) [%5d]:"
#define PCP_UMSG_NDR "Enter dimension of transformed space: (1..%d) [%5d]:"
#define PCP_UMSG_NDIM "Enter dimension of transformed space (%d..%d) [%d]:"
#define PCP_UMSG_NXVAL "Enter number of cross-validation subsets (%d..%d) [%5d]:"
#define PCP_UMSG_NHIDDEN "Enter number of hidden layers (>= 1) [1]:"
#define PCP_UMSG_NNODES "Enter number of nodes in hidden layer %5d:"
#define PCP_UMSG_INIT_WEIGHTS "Enter amplitude of initial weights [%5.2f]:"
#define PCP_UMSG_OPT_METHOD "Enter optimization method - conj. grad (%d), grad. descent (%d) [%5d]:"
#define PCP_UMSG_LRATE "Enter learning rate [%5.2g]:"
#define PCP_UMSG_MOMENTUM "Enter momentum term [%5.2g]:"
#define PCP_UMSG_NMLP "Enter number of MLPs to combine (>= 1) [%5d]:"
#define PCP_UMSG_NN "Enter number of nearest neighbors (1..%d) [%5d]:"
#define PCP_UMSG_NN1 "Enter number of nearest neighbors (>= 1) [%5d]:"
#define PCP_UMSG_XNN "Enter k-NN cross-validation file name "
#define PCP_UMSG_ONAME "Enter output file name "
#define PCP_UMSG_KSUB "Enter number of k-NN subsets to combine (>= 1) [%5d]:"
#define PCP_UMSG_KDIST "Use Euclidean (%d), city-block (%d) or Mahalanobis distance (%d) [%d]:"
#define PCP_UMSG_LD_FNAME "Enter linear discriminant model file name "
#define PCP_UMSG_PLC_FNAME "Enter parametric linear classifier file name "
#define PCP_UMSG_EMAP_S1 "Starting point: SVD (%d), random (%d), or file (%d) [%d]:"
#define PCP_UMSG_EMAP_S2 "Starting point: Fisher (%d), PCA (%d), random (%d), or file (%d) [%d]:"
#define PCP_UMSG_EMAP_S3 "Starting point: PCA (%d), random (%d), or file (%d) [%d]:"
#define PCP_UMSG_INDET "Use indeterminate region: yes (%d) or no (%d) [%d]:"
#define PCP_UMSG_REPLACE "Replace current dataset(s) (%d) or not (%d) [%d]:"
#define PCP_UMSG_HFORMAT "File format: header line (0: no; 1: yes) [0]:"
#define PCP_UMSG_RFORMAT "File format: named rows (0: no; 1: yes) [0]:"
#define PCP_UMSG_LOAD "Load training (%d) or test (%d) dataset [%d]:"
#define PCP_UMSG_MLP_MSEL1 "Enter starting number of nodes (>= 1) [%d]:"
#define PCP_UMSG_MLP_MSEL2 "Enter ending number of nodes (>= %d) [%d]:"
#define PCP_UMSG_MLP_MSEL3 "Enter step (>= %d) [%d]:"
#define PCP_UMSG_MLP_MSEL4 "Enter starting number of iterations (>= %d) [%d]:"
#define PCP_UMSG_MLP_MSEL5 "Enter ending number of iterations (>= %d) [%d]:"
#define PCP_UMSG_KNN_MSEL1 "Enter starting number of nearest neighbors (1..%d) [%d]:"
#define PCP_UMSG_KNN_MSEL2 "Enter ending number of nearest neighbors (%d..%d) [%d]:"
#define PCP_UMSG_KNN_MSEL3 "Enter step (>= 1) [%d]:"
#define PCP_UMSG_ENSEMBLE "Choose bagging (%d) or Adaboost (%d) [%d]: "
#define PCP_UMSG_SVD_MODE "Use training dataset (%d) or both (%d) [%d]: "
#define PCP_UMSG_SVD "Calculating SVD transform - please wait..."
/*
SVM kernel types. The LIBSVM kernel types cannot be used because one
of the types is 0, which is reserved for the default kernel type.
*/
#define PCP_SVM_K_NONE -1
#define PCP_SVM_K_LINEAR 1
#define PCP_SVM_K_POLY 2
#define PCP_SVM_K_RBF 3
#define PCP_SVM_K_SIGMOID 4
/*
SVM parameter defaults.
*/
#define PCP_SVM_DFLT_CACHE 40
#define PCP_SVM_DFLT_C 1000
#define PCP_SVM_DFLT_NU 0.2
#define PCP_SVM_DFLT_GAMMA 0.01
#define PCP_SVM_DFLT_DEGREE 3
#define PCP_SVM_DFLT_COEF0 0.0
#define PCP_SVM_DFLT_EPS_C 0.001
#define PCP_SVM_DFLT_EPS_NU 0.00001
#define PCP_SVM_DFLT_PROB 0
/*
MLP model selection defaults.
*/
#define PCP_MLP_DFLT_NHL 5 /* min. number of nodes */
#define PCP_MLP_DFLT_HSTP 5 /* number of nodes, step factor */
#define PCP_MLP_DFLT_MIN 100 /* min. number of iterations */
#define PCP_MLP_DFLT_ISTP 5 /* number of iterations, step factor */
/*
k-NN model selection defaults.
*/
#define PCP_KNN_DFLT_KMIN 1 /* min. number of neighbors */
#define PCP_KNN_DFLT_KSTEP 1 /* number of neighbors, increment factor */
#define PCP_KNN_DFLT_KMAX 20 /* max. number of neighbors */
/*
Default values.
*/
#define PCP_DFLT_NEXP 1
#define PCP_DFLT_NITER 20
#define PCP_DFLT_NFSEL 2 /* number of features for subset selection */
/*
Standard file names.
File Description
PCP_STS PCP status
PCP_DBG debug file name
PCP_DAT
PCP_ERR PCP error messages
PCP_MLP MLP output
PCP_XMP MLP cross-validation results
PCP_RCL MLP results
PCP_KNN k-NN results
PCP_XNN k-NN cross-validation results
PCP_SVM SVM model
PCP_XSV SVM cross-validation results
PCP_LIN linear discriminant model
PCP_XLD linear-discriminant cross-validation results
PCP_BAG temporary model file for bagging cross-validation
PCP_ADA temporary model file for Adaboost cross-validation
PCP_XTR training sets for cross-validation
PCP_XTE test sets for cross-validation
PCP_LVM TDS in LIBSVM-compatible file format
PCP_LIN default name for linear transformation matrix
PCP_RNK feature rank file
PCP_SET feature subset file
PCP_PLC parametric linear classifier
PCP_PPC parametric pairwise classifier
PCP_PQC parametric quadratic classifier
PCP_XPL cross-validation results for parametric linear classifier
PCP_EMP EMAP transformation file name
PCP_XSM cross-validation summary file
PCP_XSL file with cross-validation feature subsets
PCP_XSF like PCP_XSL, transposed
PCP_CLX suffix for cluster files
*/
#define PCP_STS "pcp.sts"
#define PCP_DBG "pcp.dbg"
#define PCP_DAT "pcp.dat"
#define PCP_ERR "pcp.err"
#define PCP_MLP "pcp.mlp"
#define PCP_RCL "pcp.rcl"
#define PCP_KNN "pcp.knn"
#define PCP_XMP "pcp.xmp"
#define PCP_XSV "pcp.xsv"
#define PCP_XLD "pcp.xld"
#define PCP_XNN "pcp.xnn"
#define PCP_BAG "pcp.bag"
#define PCP_ADA "pcp.ada"
#define PCP_SVM "pcp.svm"
#define PCP_LIN "pcp.lin"
#define PCP_XTR "pcp.xtr"
#define PCP_XTE "pcp.xte"
#define PCP_LVM "pcp.lvm"
#define PCP_LIN "pcp.lin"
#define PCP_RNK "pcp.rnk"
#define PCP_SET "pcp.set"
#define PCP_PLC "pcp.plc"
#define PCP_PPC "pcp.ppc"
#define PCP_PQC "pcp.pqc"
#define PCP_XPL "pcp.xpl"
#define PCP_XPQ "pcp.xpq"
#define PCP_EMP "pcp.emp"
#define PCP_BEE "pcp.bee"
#define PCP_XSM "pcp.xsm"
#define PCP_MSL "pcp.msl"
#define PCP_XSL "pcp.xsl"
#define PCP_XSF "pcp.xsf"
#define PCP_CLX ".clx"
/*
Standard filename length in PCP.
*/
#define PCP_FLEN 30
/*
In certain display functions, up to PCP_DISPFLEN characters of a
filename can be displayed.
*/
#define PCP_DISP_FLEN 30
/*
The standard length of display line in PCP.
*/
#define PCP_QLEN 78
/*
The longest allowed user message in PCP.
*/
#define PCP_LONG_LEN 1000
/*
The long line.
*/
#define PCP_LQLEN 100
/*
Display macros - each PCP_QLEN wide.
*/
#define PCP_LINE "+----------------------------------------------------------------------------+\n"
#define PCP_XLINE "+----------------------------------------------------------------------------+\n"
#define PCP_EMPTY_LINE "| |\n"
/*
Wide versions of PCP_QLEN, PCP_LINE, PCP_EMPTY_LINE.
*/
#define PCP_WLEN 100
#define PCP_WLINE "+--------------------------------------------------------------------------------------------------+\n"
#define PCP_EMPTY_WLINE "| |\n"
/*
Number of info columns in certain output files (.mlp, .xmp, ...).
*/
#define PCP_HFT 40
/*
Unassigned class ID/name.
*/
#define PCP_UNASSIGNED -1
#define PCP_UNASSIGNED_NAME "unassigned"
/*
Classification algorithm codes.
*/
#define PALG_MLP 1 /* MLP */
#define PALG_BAG_MLP 2
#define PALG_SVM 3 /* SVM */
#define PALG_ADABOOST_MLP 4
#define PALG_ADABOOST_SVM 5
#define PALG_BAG_SVM 6
#define PALG_LIN 7 /* linear discriminant */
#define PALG_BAG_LIN 8
#define PALG_ADABOOST_LIN 9
#define PALG_KNN 10 /* k-NN */
#define PALG_BAG_KNN 11
#define PALG_PLC 12 /* parametric linear classifier */
#define PALG_BAG_PLC 13
#define PALG_PQC 14 /* parametric quadratic classifier */
#define PALG_BAG_PQC 15
/*
Dimension reduction algorithm codes.
The codes should be self-explanatory, except for PDR_RANDOM and
PDR_FILE. These are the 'algorithms' used to generate starting
points for EMAP dimension reduction. PDR_RANDOM, as the name
implies, consists in pseudo-randomly generated feature extraction
mapping. PDR_FILE refers to the mapping read from a file provided by
the user.
SVD is only available if the number of vectors 'nv' is less or equal
dimensionality of input space 'd', FLD and PCA are only available
otherwise, according to the following rules:
available dimensionality reduction methods
nv <= d SVD, no FLD, PCA
nv > d all except SVD
*/
#define PDR_NONE 0
#define PDR_SVD 1
#define PDR_FISHER 2
#define PDR_PCA 3
#define PDR_EMAP 4
#define PDR_RANKING 5
#define PDR_FORWARD 6
#define PDR_BACKWARD 7
#define PDR_FLOAT_FORWARD 8
#define PDR_FLOAT_BACKWARD 9
#define PDR_PLUS_L_MINUS_R 10
#define PDR_BB 11
#define PDR_RANDOM 12
#define PDR_FILE 13
/*
Feature selection criteria for feature ranking methods. NOTE: all
criteria are `bigger is better'.
*/
#define PCP_FSEL_EUCLIDEAN 1 /* Euclidean distance, used for individual feature selection */
#define PCP_FSEL_PEARSON 2 /* Pearson corr. coefficient, used for individual feature selection */
#define PCP_FSEL_GOLUB 3 /* specialized distance described in Golub et. al, Science vol. 286, Oct 15 1999 */
#define PCP_FSEL_KNN 4 /* 1-NN accuracy rate (100- 1-NN error rate) */
#define PCP_FSEL_BAYES 5 /* Bayes accuracy estimate (100-Bayes error rate) */
#define PCP_N_FSCRIT 5 /* the number of available feature ranking criteria */
/*
Feature selection criteria which apply to subset selection methods.
*/
#define PCP_FSUB_KNN 1 /* 1-NN accuracy rate */
#define PCP_FSUB_IN_IN 2 /* inter-intra distance */
#define PCP_FSUB_BAYES 3 /* Bayes accuracy estimate */
#define PCP_N_SUB_FSCRIT 3 /* the number of available feature subset selection criteria. */
#define PDS_NONE "none"
#define PDS_SVD "Singular Value Decomposition"
#define PDS_FISHER "Fisher's linear discriminant"
#define PDS_PCA "Principal Component Analysis"
#define PDS_RANKING "feature ranking"
#define PDS_EMAP "EMAP"
#define PDS_FORWARD "forward selection"
#define PDS_BACKWARD "backward elimination"
#define PDS_FLOAT_FORWARD "forward floating search"
#define PDS_FLOAT_BACKWARD "backward floating search"
#define PCP_SCRIT_EUCLIDEAN "Euclidean"
#define PCP_SCRIT_PEARSON "Pearson"
#define PCP_SCRIT_GOLUB "Golub"
#define PCP_SCRIT_KNN "1-NN error rate"
#define PCP_SCRIT_BAYES "Bayer error"
#define PCP_SCRIT_IN_IN "inter-intra distance"
#define PCP_FSEL_LPAR 10
#define PCP_FSEL_RPAR 5
/*
Replace or not the existing data set.
*/
#define P_DO_NOT_REPLACE 0
#define P_REPLACE 1
/*
Data transformation types.
*/
#define P_NORMALIZE 1
#define P_MAP 2
/*
Launch Bayes error estimation for TDS.
'errc' is output error code. If 'debug' is not 0, send debugging
information to 'pcp.dbg' file.
For 'approximate_l_mode' == 1, use approximate L mode.
*/
void p_bayes(int *errc, int approximate_l_mode, int debug);
/*
Display Bayes error estimation results.
*/
void p_disp_bayes(int *errc, char **xname);
/*
Read one character from keyboard, no echo. Return it in 'key'.
*/
int read_keyboard(int *key);
/*
Wait for any character, no echo.
*/
void pwait(void);
/*
In-place float sorting function based on algorithm by
R. Sedgewick. In my experience, it is about 40% faster than the C
library function qsort(). The sedgesort() function which implements
the algorithm is downloaded from
http://www.yendor.com/programming/sort/. ljb, 10/14/2001
BIG NOTE: 'vector' needs to have len+1 elements, instead of the
usual len. The first len elements are the numbers to be sorted. The
last element is used internally by sedgesort() function (which is
called by ssort()) as a 'sentinel'. This is obviously a restriction
on part on finsort(); it means that the user must allocate one more
element for each vector to be sorted. If this is not acceptable, use
fsort(), which creates a new vector with the extra element.
*/
void finsort(float *vector, int len);
/*
Floating-point sorting function. The function allocates space for
the sorted vector. The input 'vector' is not modifed.
Return (float *) in case of error and set errno.
*/
float *fsort(float *vector, int len);
/*
Floating-point sorting function. hsort() uses modified version of
heapsort() to sort 'm' smallest values in a vector. This makes sense
if 'k' is considerably smaller than the length of the vector;
otherwise, sedgesort() is faster.
*/
void hsort(float *vector, int *len, int *m);
/*
In-place integer sorting function based on algorithm by
R. Sedgewick.
BIG NOTE: 'vector' needs to have len+1 elements. The first len
elements are the numbers to be sorted. The last element is used
internally by sedgesort() function (which is called by ssort()) as a
'sentinel'.
*/
void intsort(int *vector, int len);
/*
Interface to svd(), a linear transformation of high-dimensional
vectors using Singular Value Decomposition (SVD) of a covariance
matrix (a variant of Karhunen-Loeve transform).
In case of error, set 'errc'. In case of file error, set the file
name in 'xname'.
Error codes: 1. this function assumes that the rows of 'input' are
high-dimensional, that is typically nvec << d. If nvec >= d, set
'errc' to PERR_INC_DIM.
*/
void p_svd(int *errc, char **xname, int dbg);
/*
Feature extraction using SVD. For input 'nv' by 'd' data matrix 'x',
returns linear transformation matrix 'd' by 'nv'. It is assumed that
'nv' is <= 'd'.
If nv is > d, return NULL. In case of failure to iterate in eigen(),
return NULL and set errc to LERR_ITMAX. In case of malloc() error,
return NULL and set errc to the corresponding errno.
*/
float **svd_transform(float **x, int nv, int d, int *errc);
/*
Implementation of EMAP functionality (linear feature transformation
which minimizes Bayes error estimate in the transformed space). The
function computes the transformation matrix using the training data
set, and saves it in a file.
In case of error, set 'errc'. If 'errc' is file access error,
'xname' is name of file associated with the error. If 'dbg' is > 0,
send debugging information to debug file.
*/
void p_emap(int *errc, char **xname, int dbg);
/*
Intermediate data loading function.
*/
void p_load(int *errc, char **xname);
/*
Startup data load. Reads the status file, then loads data from input
files into the global C data structures 'teds' and 'tds'.
In case of success, set 'errc' to 0. Otherwise, set errc to error
code. If 'errc' is file-related error code, 'xname' is name of
offending file.
*/
void init_load(int *errc, char **xname);
/*
Save status of 'test_dataset', 'training_dataset' in 'fname'.
*/
int save_sts(char *fname, struct dataset *test_dataset, struct dataset *training_dataset);
/*
Copy data set.
*/
void p_copy(int *errc, char **xname);
/*
Perform multiple cross-validation learning experiments.
Each experiment consists in the following cross-validation test:
train the classifier using 'method' on a pseudo-randomly chosen
subset of the training data set, and test it on the remainder of the
dataset; then choose another training/test partition, repeat the
test, etc. This is repeated 'nxval' times, where 'nxval' is the
number of cross-validation subsets.
*/
void p_xlearn(int method, int *errc, char **xname, int dbg);
/*
Transform data set(s) using transformation 'ttype'. The available
values for ttype are P_NORMALIZE and P_MAP. Optionally copy the
transformed data set(s) into the current data set.
*/
void p_transform(int ttype, int *errc, char **xname);
/*
Remove datasets (used in case of error).
*/
void remove_datasets(void);
void con(void);
void coff(void);
/*
Select optimal feature subset in TDS. The function collects
parameters from the user (output file name and criterion), computes
the optimal feature subset, and saves it in the output file.
*/
void p_fselect(int *errc, char **xname, int dbg);
/*
Display the ranking of features in a user-defined file.
*/
void p_fdisp(int *errc, char **xname, int dbg);
/*
Extract and save a user-defined subset of features from current data
sets. Optionally, replace the current data sets with the subset.
*/
void p_f_subset(int *errc, char **xname, int dbg);
void eb(int *errc);
/*
Collect input parameters from the user, call the parametric linear
classifier function lin_learn() on TDS, save the classifier and
display results of TDS classification.
In case of error, return error code and, in case of file error,
store the offending file name in 'xname'.
*/
int p_lin_learn(char **xname);
/*
Collect input parameters from the user, call the parametric
quadratic classifier learning function pqc_learn() on TDS, save the
classifier and display results of TDS classification.
In case of error, return error code and, in case of file error,
store the offending file name in 'xname'.
*/
int p_pqc_learn(char **xname);
/*
Classify test data set using parametric quadratic classifier stored
in a file. The function obtains input parameters from the user,
calls dataset_pqc_predict(), and displays classification results.
In case of success, return 0. In case of failure, return error
code. In case of file access error, return the relevant file name in
'xname'.
*/
int p_pqc_predict(char **xname);
/*
Collect input parameters from the user and call lind_learn() which
performs the training.
In case of failure, set 'errc'. If error is file access error, set
'xname' to the name of the file which triggered the error.
*/
void p_lind_learn(int *errc, char **xname, int *dbg);
/*
Classify test data set using linear classifier 'method' in a
file. 'method' can be PALG_LIN/PALG_BAG_LIN for linear discriminant
classifier, or PALG_PLC/PALG_BAG_PLC for parametric linear
classifier. The function obtains input parameters from the user,
calls lin_predict(), and displays classification results.
In case of success, return 0. In case of failure, return error
code. In case of file access error, return the relevant file name in
'xname'.
*/
int p_lin_predict(int method, char **xname);
/*
Classify test set using k-NN method, display and save results.
*/
void p_knn(int *errc, char **xname, int *dbg);
/*
Accept input parameters and pass them to the k-NN bagging function.
In case of error, set 'errc'. If error is file access error, set
'xname'.
*/
void p_knn_bagging(int *errc, char **xname, int *dbg);
/*
Accept input parameters from the user and pass them to MLP learning
function mlp_learn().
In case of error, set 'errc'. If error is file access error, set
'xname'.
*/
void p_mlp_learn(int *errc, char **xname, int *dbg);
void p_mlp_predict(int *errc, char **xname);
/*
This function collects SVM learning parameters from the user, and
then calls svm_train() which performs the SVM training.
In case of successful completion, set 'errc' to 0. In case of error,
set errc to 'errno'. If error is file access error, set 'file'.
*/
void p_svm_learn(int *errc, char **xname, int dbg);
/*
Classify test data set using SVM classifier stored in 'svm_fname'
(the file name provided by user), and display classification
results.
In case of successful completion, set 'errc' to 0. Otherwise, set
errc to 'errno'. In case of file error, store the name of the file
causing the error in 'xname'.
*/
void p_svm_predict(int *errc, char **xname);
/*
Save training data set in LIBSVM data format. See LIBSVM
documentation for the format description.
In case of success, set 'errc' to 0, otherwise set it to 'errno'. If
error is file access error, store the name of the offending file in
'xname'.
*/
void p_svm_save(int *errc, char **xname);
/*
Set the SVM parameters we decided to keep as defaults.
*/
void svm_param_defaults(void *svm_parameters);
/*
Compute max. feasible value of nu for NU-SVM. Use Proposition 3 in
Section 6 of
http://www.csie.ntu.edu.tw/~cjlin/papers/nusvmtutorial.pdf.
*/
float get_svm_nu_max(int *nd, int c);
/*
Compute max. feasible value of nu for `nxval' NU-SVM
cross-validation experiments. The function actually computes (tight)
lower bound on nu, assuming all subsets are of the size
(nxval-1)*(size/nxval).
*/
float get_svm_nu_xmax(int *nd, int c, int nxval);
/*
Optimize SVM parameters using simplex.
*/
void pcp_xpar(int *errc, int dbg, char **xname);
/*
SVM model selection using grid (i.e., exhaustive) search.
*/
void pcp_svm_grid(int *errc, int dbg, char **xname);
void p_disp(int menu_id, int size);
/*
Optimize SVM parameters using Simplex algorithm.
*/
void pcp_svm_simplex(int *errc, int dbg, char **xname);
/*
Multi-Layer Perceptron model selection. The function chooses optimal
number of hidden nodes and number of iterations. It is assumed that
the MLP has one hidden layer.
*/
void pcp_mlp_xpar(int *errc, int dbg, char **xname);
/*
k-NN model selection. The function chooses optimal number of nearest
neighbors.
*/
void pcp_knn_xpar(int *errc, int dbg, char **xname);
#endif
syntax highlighted by Code2HTML, v. 0.9.1