/*
File name: xpar.c
Created by: Ljubomir Buturovic
Created: 02/11/2005
Purpose: find optimal parameters minimizing cross-validation error
of a classifier.
*/
/*
Copyright 2004 Ljubomir J. Buturovic
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <stdlib.h>
#include <float.h>
#include <errno.h>
#include <unistd.h>
#include <math.h>
#include <string.h>
#include "xpar.h"
#include "lau.h"
#include "lmat.h"
#include "pau.h"
#include "pcp.h"
#include "simplex.h"
#include "fselect.h"
#include "xlearn.h"
#include "pcl_svm.h"
#include "svm.h"
#include "cda.h"
#include "emap.h"
#include "mlp.h"
#include "knn.h"
static char rcsid[] = "$Id: xpar.c,v 1.78 2006/05/24 05:04:19 ljubomir Exp $";
/*
Save optimal feature subsets computed by x_fsel().
Return -1 and set errno in case of file error.
*/
static int save_fsel(int ***fsel, struct dataset *dset, int idr, int nexp, int nxval)
{
int i;
int j;
int k;
int idx;
int status;
FILE *fptr;
status = 0;
fptr = fopen(PCP_XSL, "w");
if (fptr)
{
for (i = 0; i < idr; i++)
{
for (j = 0; j < nexp; j++)
for (k = 0; k < nxval; k++)
fprintf(fptr, "%d\t", fsel[j][k][i]+1);
if (dset->alab)
{
for (j = 0; j < nexp; j++)
for (k = 0; k < nxval; k++)
{
idx = fsel[j][k][i];
fprintf(fptr, "%s\t", dset->alab[idx]);
}
}
fprintf(fptr, "\n");
}
status = fclose(fptr);
}
else
status = -1;
return status;
}
int ***fsel_free(int ***fsel, int nexp, int nxval)
{
int i;
int j;
if (fsel)
{
for (i = 0; i < nexp; i++)
{
for (j = 0; j < nxval; j++)
free(fsel[i][j]);
free(fsel[i]);
}
free(fsel);
}
return (int ***) 0;
}
void xpar_free(struct xpar_crit_parameters *xpar_parameters)
{
free(xpar_parameters->options);
fsel_free(xpar_parameters->fsel, xpar_parameters->nexp, xpar_parameters->nxval);
free(xpar_parameters);
}
/*
Log cross-validation subset generated in experiment `ex',
cross-validation subset `xval_idx'.
The subsets generated by x_fsel() and xpar_func() must be identical.
*/
void log_subset(FILE *fdbg, char *func, struct dataset *learning_dset, int ex, int xval_idx)
{
int jdx;
int ndx;
jdx = learning_dset->d;
ndx = learning_dset->nv;
fprintf(fdbg, "%s; experiment: %d; xval: %d; learning_dset->x[0][0]: %12.5g; learning_dset->x[%d][%d]: %12.5g\n",
func, ex, xval_idx, learning_dset->x[0][0], ndx-1, jdx-1, learning_dset->x[ndx-1][jdx-1]);
fflush(fdbg);
}
/*
Compute optimal feature subsets in `dset' for `nexp' experiments and
`nxval' cross-validation subsets. The feature selection chooses
optimal `idr' features using `dr_method' and `fscrit' criterion. If
`normalize' is 1, the data is optionally normalized first.
This function is used to improve (significantly) performance of
Model Selection functionality. Model Selection repeats
cross-validation estimation for various values of SVM classifier
parameters. This process optionally includes (a very time consuming)
dimensionality reduction for each experiment and each
cross-validation subset. However, there is no need to execute this
repeatedly for different SVM parameter values, since the inputs are
identical, and therefore the results will be identical. It is
sufficient to compute the chosen feature subsets once, and then
reuse them for subsequent cross-validation experiments. That is the
job of x_fsel().
Optionally write intermediate results in `fname'.
The function returns array `fsel'. It contains selected feature
subsets for all `nexp' experiments and `nxval' cross-validation
subsets. For example, fsel[2][3] is the list of features selected in
experiment 2, cross-validation subset 3.
In case of failure, return NULL and set `errc'.
*/
static int ***x_fsel(struct dataset *dset, int nexp, int nxval, int normalize,
int dr_method, int idr, int fscrit, char *fname, FILE *fdbg,
int *errc)
{
int status;
int i;
int j;
int k;
int idx;
int *findex;
int *vfx;
int **sxc;
int **lxc;
int ***fsel;
FILE *fptr;
struct dataset *learning_dset;
struct dataset *uset;
status = 0;
fptr = (FILE *) 0;
fsel = malloc(nexp*sizeof(int **));
if (fsel)
{
for (i = 0; (i < nexp) && !status; i++)
{
fsel[i] = malloc(nxval*sizeof(int *));
if (!fsel[i])
{
status = -1;
*errc = errno;
}
}
if (!status && fname)
{
fptr = fopen(fname, "w");
if (!fptr)
{
status = -1;
*errc = errno;
}
}
for (i = 0; (i < nexp) && !status; i++)
{
status = xpart(dset->c, dset->nd, nxval, &sxc, &lxc);
if (!status)
{
for (j = 0; (j < nxval) && !status; j++)
{
status = xset(dset, lxc, sxc, j, &learning_dset, (struct dataset **) 0, (FILE *) 0);
if (!status)
{
if (normalize)
status = normalize_attributes(learning_dset, (struct dataset *) 0);
if (!status)
{
if (fdbg)
log_subset(fdbg, "x_fsel()", learning_dset, i, j);
uset = select_subset(learning_dset, idr, dr_method, fscrit, &findex, 0, errc);
dataset_free(uset);
if (*errc == 0)
{
vfx = ivec_clone(findex, idr);
free(findex);
fsel[i][j] = vfx;
if (fname)
{
for (k = 0; k < idr; k++)
{
idx = vfx[k];
if (dset->alab)
fprintf(fptr, "%s\t", dset->alab[idx]);
else
fprintf(fptr, "%d\t", idx+1);
}
fprintf(fptr, "\n");
fflush(fptr);
}
}
else
fsel = (int ***) 0;
}
else
fsel = (int ***) 0;
dataset_free(learning_dset);
}
else
fsel = (int ***) 0;
}
mx_free((void **) sxc, dset->c);
mx_free((void **) lxc, dset->c);
}
}
if (fname)
{
status = fclose(fptr);
if (status != 0)
{
*errc = errno;
fsel = fsel_free(fsel, nexp, nxval);
}
}
}
else
{
status = -1;
*errc = errno;
}
if (!status)
{
status = save_fsel(fsel, dset, idr, nexp, nxval);
if (status == -1)
{
*errc = errno;
fsel = fsel_free(fsel, nexp, nxval);
}
}
return fsel;
}
/*
Feature extraction equivalent of x_fsel(): compute optimal feature
mappings of `dset' for `nexp' experiments and `nxval'
cross-validation subsets.
The function returns array `fext'. It contains optimal feature
mappings (i.e., transformation matrices) for all `nexp' experiments
and `nxval' cross-validation subsets. For example, fext[2][3] is the
feature transformation matrix selected in experiment 2,
cross-validation subset 3.
In case of failure, return NULL and set `errc'.
*/
static float ****x_fext(struct dataset *dset, int nexp, int nxval, int normalize,
int dr_method, int idr, int *errc)
{
int status;
int i;
int j;
int idx;
int **sxc;
int **lxc;
float **map;
float ****fext;
struct dataset *learning_dset;
status = 0;
map = (float **) 0;
fext = malloc(nexp*sizeof(float ***));
if (fext)
{
for (i = 0; (i < nexp) && !status; i++)
fext[i] = malloc(nxval*sizeof(float **));
for (i = 0; (i < nexp) && !status; i++)
{
status = xpart(dset->c, dset->nd, nxval, &sxc, &lxc);
if (!status)
{
for (j = 0; (j < nxval) && !status; j++)
{
status = xset(dset, lxc, sxc, j, &learning_dset, (struct dataset **) 0, (FILE *) 0);
if (!status)
{
if (normalize)
status = normalize_attributes(learning_dset, (struct dataset *) 0);
if (!status)
{
if (dr_method == PDR_SVD)
map = svd_transform(learning_dset->x, learning_dset->nv, learning_dset->d, errc);
else if ((dr_method == PDR_PCA) || (dr_method == PDR_FISHER))
{
if (dr_method == PDR_PCA)
map = pca(learning_dset, errc);
else if (dr_method == PDR_FISHER)
map = fld(learning_dset, errc);
}
else if (dr_method == PDR_EMAP)
{
idx = ivec_min(learning_dset->nd, learning_dset->c)-1;
map = emap(learning_dset, idr, 1, idx, errc);
}
if (map)
fext[i][j] = map;
else
fext = (float ****) 0;
}
else
fext = (float ****) 0;
dataset_free(learning_dset);
}
else
fext = (float ****) 0;
}
mx_free((void **) sxc, dset->c);
mx_free((void **) lxc, dset->c);
}
}
}
return fext;
}
/*
In case of two classes, add confusion matrix to the model selection file.
*/
static void save_cmx(FILE *fptr, int c, float sens, float spec, float ppv, float npv)
{
if (c == 2)
fprintf(fptr, "\t%7.2f\t%7.2f\t%7.2f\t%7.2f", sens, spec, ppv, npv);
fprintf(fptr, "\n");
}
/*
Save the criterion function value and parameters for C-SVM.
*/
static int save_C_val(float ecrit, float *fvec, int n, int iteration, int c,
int kernel_type, float sens, float spec, float ppv, float npv)
{
int i;
int status;
FILE *fptr;
status = 0;
fptr = fopen(PCP_MSL, "a");
if (fptr)
{
fprintf(fptr, "%7.2f\t", ecrit);
for (i = 0; i < n-1; i++)
fprintf(fptr, "%12.5g\t", pow(10, fvec[i]));
fprintf(fptr, "%12.5g\t%d", pow(10, fvec[n-1]), iteration);
save_cmx(fptr, c, sens, spec, ppv, npv);
status = fclose(fptr);
}
else
status = -1;
return status;
}
/*
Save the criterion function value and parameters for NU-SVM.
*/
static int save_nu_val(float ecrit, float *fvec, int n, int iteration, int c,
int kernel_type, float sens, float spec, float ppv, float npv)
{
int status;
FILE *fptr;
status = 0;
fptr = fopen(PCP_MSL, "a");
if (fptr)
{
fprintf(fptr, "%7.2f\t", ecrit);
fprintf(fptr, "%12.5g\t", fvec[0]); /* nu */
fprintf(fptr, "%12.5g\t%d", pow(10, fvec[n-1]), iteration); /* gamma */
save_cmx(fptr, c, sens, spec, ppv, npv);
status = fclose(fptr);
}
else
status = -1;
return status;
}
/*
Save the criterion function value and parameters.
*/
static int save_val(float ecrit, float *fvec, int n, int iteration, int c,
float sens, float spec, float ppv, float npv)
{
int i;
int status;
FILE *fptr;
status = 0;
fptr = fopen(PCP_MSL, "a");
if (fptr)
{
fprintf(fptr, "%7.2f\t", ecrit);
for (i = 0; i < n-1; i++)
fprintf(fptr, "%12.5g\t", fvec[i]);
fprintf(fptr, "%12.5g\t%d", fvec[n-1], iteration);
save_cmx(fptr, c, sens, spec, ppv, npv);
status = fclose(fptr);
}
else
status = -1;
return status;
}
/*
Create `xpar_crit_parameters' structure for `method'.
*/
struct xpar_crit_parameters *init_spar(int method, struct dataset *dset, FILE *fdbg)
{
struct xpar_crit_parameters *xpar_params;
xpar_params = calloc(1, sizeof(struct xpar_crit_parameters));
if (xpar_params)
{
xpar_params->dset = dset;
xpar_params->outdev = (FILE *) 0;
xpar_params->fdbg = fdbg;
xpar_params->vid = 0;
xpar_params->eval = FLT_MAX;
xpar_params->method = method;
xpar_params->nmodels = 1;
}
return xpar_params;
}
/*
Compute value of xpar() criterion function. xpar() is a wrapper
around xlearn(), which computes cross-validation error rate for a
given dimension reduction/classifier combination.
xpar_func() uses rand() to partition input dataset into learning and
validation subsets for cross-validation. The pseudo-random number
generator is initialized using srand(parameters->seed) function
call. Therefore, to reproduce the results for a given dataset, you
need to give the same parameters->seed value.
In case of error, set 'errc'. The errors are malloc() and xlearn()
errors.
*/
float xpar_func(float *fvec, int n, int iteration, void *parameters, int *errc)
{
int idx;
int method;
int dr_method;
int idr;
int fscrit;
int normalize;
int nmodels;
int status;
int vid;
int error_count;
int ex;
int nexp;
int bag_size;
int c;
int nxval;
int i;
int t_p;
int f_n;
int f_p;
int t_n;
int kernel_type;
float ecrit;
float amce;
float xmce;
float sens;
float spec;
float ppv;
float npv;
float nu_max;
int *findex;
int *ccer;
int *tccer;
int **sxc;
int **lxc;
float **fext;
FILE *outdev;
FILE *fdbg;
struct xpar_crit_parameters *xpar_parameters;
struct dataset *dset; /* training data set */
struct dataset *learning_dset;
struct dataset *validation_dset;
struct dataset *l_dset; /* learning/validation sets after norm./DR */
struct dataset *v_dset;
unsigned int seed;
status = 0;
error_count = -1;
xpar_parameters = (struct xpar_crit_parameters *) parameters;
method = xpar_parameters->method;
fdbg = xpar_parameters->fdbg;
nexp = xpar_parameters->nexp;
dset = xpar_parameters->dset;
nxval = xpar_parameters->nxval;
/*
NU-SVM parameter optimization is actually constrained optimization
problem, since nu must be in [0, nu_max] (see get_svm_nu_max() for
definition of nu_max). Simplex does not support constrained
optimization.
For now, do this: if nu violates the constraint, return a large
value. That should discourage Simplex from going in that
direction.
*/
idx = 0; /* idx = 1 means constraint violation */
if ((method == PALG_SVM) && ((struct svm_parameter *) xpar_parameters->options)->svm_type == NU_SVC)
{
nu_max = get_svm_nu_xmax(dset->nd, dset->c, nxval);
/*
Check if nu is outside of range.
*/
if ((fvec[0] < 0) || (fvec[0] > nu_max))
idx = 1;
}
if (idx)
ecrit = FLT_MAX;
else
{
dr_method = xpar_parameters->dr_method;
idr = xpar_parameters->idr;
fscrit = xpar_parameters->fscrit;
normalize = xpar_parameters->normalize;
nmodels = xpar_parameters->nmodels;
vid = xpar_parameters->vid;
seed = xpar_parameters->seed;
outdev = xpar_parameters->outdev;
c = dset->c;
fdbg = xpar_parameters->fdbg;
/*
Compute cross-validation error rates for the given learning
method and parameters.
srand() ensures reproducible results. This call guarantees
that each invocation of xpar_func() with identical input
arguments will produce identical result.
*/
srand(seed);
tccer = calloc(c, sizeof(int)); /* class-conditional error counts for one experiment */
ecrit = 0.0;
sens = 0.0;
spec = 0.0;
ppv = 0.0;
npv = 0.0;
for (ex = 0; (ex < nexp) && !status; ex++)
{
amce = 0.0;
status = xpart(c, dset->nd, nxval, &sxc, &lxc);
/*
lxc[i][j] is number of class i vectors in cross-validation
subset j.
sxc[i] is list of class i vectors with arranged so that
cross-validation subsets are in sequence. For example, if
there are 3 cross-validation subsets with 10 vectors each,
sxc[i][0..9] is list of class i vectors in subset 0,
sxc[i][10..19] is list of class i vectors in subset 1, and
sxc[i][20..29] is list of class i vectors in subset 2.
*/
if (!status)
{
ivec_set(tccer, c, 0);
for (idx = 0; (idx < nxval) && !status; idx++)
{
status = xset(dset, lxc, sxc, idx, &learning_dset, &validation_dset, fdbg);
/*
NOTE: in November 2002 decided to fix the size of
each bagging set to the cardinality of the
training data set. Perhaps revisit.
*/
bag_size = learning_dset->nv;
if (method == PALG_SVM)
{
if (((struct svm_parameter *) xpar_parameters->options)->svm_type == C_SVC)
((struct svm_parameter *) xpar_parameters->options)->C = pow(10, fvec[0]);
else
((struct svm_parameter *) xpar_parameters->options)->nu = fvec[0];
((struct svm_parameter *) xpar_parameters->options)->gamma = pow(10, fvec[1]);
((struct svm_parameter *) xpar_parameters->options)->verbose = 0;
}
else if (method == PALG_MLP)
{
((struct mlp_options *) xpar_parameters->options)->npl[0] = fvec[0];
((struct mlp_options *) xpar_parameters->options)->itmax = fvec[1];
}
else if (method == PALG_KNN)
((struct knn_options *) xpar_parameters->options)->k = fvec[0];
l_dset = (struct dataset *) 0;
v_dset = (struct dataset *) 0;
/*
If feature subsets or feature mappings are already
provided, reduce dimensionality of the datasets
and signal to xlearn() not to do any feature
selection.
*/
if (xpar_parameters->fsel)
{
if (normalize)
status = normalize_attributes(learning_dset, validation_dset);
if (!status)
{
if (fdbg)
log_subset(fdbg, "xpar_func()", learning_dset, ex, idx);
findex = xpar_parameters->fsel[ex][idx];
l_dset = dataset_select(learning_dset, findex, idr);
if (l_dset)
{
v_dset = dataset_select(validation_dset, findex, idr);
if (v_dset)
error_count = xlearn(method, PDR_NONE, idr, fscrit,
0, l_dset, v_dset, seed, nmodels,
bag_size, xpar_parameters->options,
&ccer, outdev, fdbg, errc);
else
status = -1;
}
}
}
else if (xpar_parameters->fext)
{
if (normalize)
status = normalize_attributes(learning_dset, validation_dset);
if (!status)
{
fext = xpar_parameters->fext[ex][idx];
l_dset = dataset_map(learning_dset, idr, fext);
if (l_dset)
{
v_dset = dataset_map(validation_dset, idr, fext);
if (v_dset)
error_count = xlearn(method, PDR_NONE, idr, fscrit,
0, l_dset, v_dset, seed, nmodels,
bag_size, xpar_parameters->options,
&ccer, outdev, fdbg, errc);
else
status = -1;
}
}
}
else
error_count = xlearn(method, dr_method, idr, fscrit, normalize, learning_dset,
validation_dset, seed, nmodels, bag_size,
xpar_parameters->options, &ccer, outdev, fdbg, errc);
if (error_count >= 0)
{
for (i = 0; i < c; i++)
tccer[i] += ccer[i];
free(ccer);
amce += error_count;
}
else
status = -1;
dataset_free(validation_dset);
dataset_free(learning_dset);
dataset_free(v_dset);
dataset_free(l_dset);
}
mx_free((void **) sxc, c);
mx_free((void **) lxc, c);
if (!status)
{
xmce = 100.0*amce/dset->nv; /* error rate for this experiment */
viprint_line(4, 1, "Experiment %6d cross-validation error rate: "
"%6.2f%% ", ex+1, xmce);
ecrit += xmce;
if (c == 2)
{
/*
NOTE: sensitivity, specificity, positive and
negative predictive values (SSPN) are defined
based on a confusion matrix, which in turn is
defined for a single experiment. Here we have
multiple experiments, and thus multiple
confusion matrices (one per experiment). The
final SSPN are computed as the averages over
the experiments.
*/
f_p = tccer[1];
f_n = tccer[0];
t_p = dset->nd[0]-f_n;
t_n = dset->nd[1]-f_p;
sens += 100.0*t_p/dset->nd[0];
spec += 100.0*t_n/dset->nd[1];
if (t_p+f_p > 0)
ppv += 100.0*t_p/(t_p+f_p);
if (t_n+f_n > 0)
npv += 100.0*t_n/(t_n+f_n);
}
}
}
else
*errc = errno;
}
free(tccer);
if (!status)
{
ecrit = ecrit/nexp;
if (c == 2)
{
sens = sens/nexp;
spec = spec/nexp;
ppv = ppv/nexp;
npv = npv/nexp;
}
if (ecrit < xpar_parameters->eval)
{
xpar_parameters->eval = ecrit;
xpar_parameters->x1 = fvec[0];
xpar_parameters->x2 = fvec[1];
}
if (method == PALG_SVM)
{
kernel_type = ((struct svm_parameter *) xpar_parameters->options)->kernel_type;
if (((struct svm_parameter *) xpar_parameters->options)->svm_type == C_SVC)
status = save_C_val(ecrit, fvec, n, iteration, c, kernel_type,
sens, spec, ppv, npv);
else
status = save_nu_val(ecrit, fvec, n, iteration, c, kernel_type,
sens, spec, ppv, npv);
}
else
status = save_val(ecrit, fvec, n, iteration, c, sens, spec, ppv, npv);
if (status == -1)
*errc = errno;
if (iteration == 0)
viprint_line(6, 1, "Iteration %7d; vertex %7d; %s %7.2f%%", iteration, vid,
XPAR_FUNC_MSG, ecrit);
else
viprint_line(5, 1, "Iteration %7d; %s %7.2f%%", iteration, XPAR_FUNC_MSG, ecrit);
/*inverse_video();
fprintf(outdev, "\n");*/
}
else
ecrit = -1.0;
}
return ecrit;
}
int xtest_optimal_malloc(float **tds_x, int tds_nv, int d, float **teds_x,
int teds_nv, float ***x, float ***y, float **xmean,
float **std)
{
int status;
float *l_xmean;
float *l_std;
float **l_x;
float **l_y;
status = -1;
l_xmean = fmx_mean(tds_x, tds_nv, d);
if (l_xmean)
{
l_std = fmx_std(tds_x, tds_nv, d);
if (l_std)
{
l_x = fmx_clone(tds_x, tds_nv, d);
if (l_x)
{
l_y = fmx_clone(teds_x, teds_nv, d);
if (l_y)
{
status = 0;
*x = l_x;
*y = l_y;
*xmean = l_xmean;
*std = l_std;
}
}
}
}
return status;
}
/*
Apply optimal parameters to test data set, display and save results.
In case of success, set *errc to 0, otherwise set it to the error
code.
*/
void xtest_optimal(struct xpar_crit_parameters *xpar_parameters, int *errc, char **xname)
{
int status;
int i;
int d;
int idr;
int predicted_class;
int method;
int *nd;
float *xmean;
float *std;
float *output;
void *model;
float **target;
struct dataset *n_tds;
struct dataset *n_teds;
struct dataset *r_tds;
struct dataset *r_teds;
struct svm_node *svm_vector = (struct svm_node *) 0;
struct svm_problem *problem;
struct mlp_options *mlp_options;
struct knn_options *knn_options;
struct svm_parameter *svm_options;
FILE *rcl_fptr;
status = 0;
model = (void *) 0;
predicted_class = 0;
d = tds->d;
method = xpar_parameters->method;
/*
Apply normalization and/or dimensionality reduction.
*/
if (xpar_parameters->normalize)
{
nd = ivec_clone(tds->nd, tds->c);
n_tds = dataset_lt(d, tds->c, nd, tds->nv, (char **) 0, (float **) 0);
nd = ivec_clone(teds->nd, teds->c);
n_teds = dataset_lt(d, teds->c, nd, teds->nv, (char **) 0, (float **) 0);
status = xtest_optimal_malloc(tds->x, tds->nv, d, teds->x, teds->nv, &n_tds->x, &n_teds->x, &xmean, &std);
if (!status)
{
fmx_prenorm(n_tds->x, tds->nv, d, xmean, std);
/*
Test data set is normalized using mean/std. from training
data set.
*/
fmx_prenorm(n_teds->x, teds->nv, d, xmean, std);
}
else
*errc = errno;
}
else
{
n_tds = tds;
n_teds = teds;
}
if (!status)
{
if (xpar_parameters->dr_method != PDR_NONE)
{
idr = xpar_parameters->idr;
status = reduce_d(n_tds, n_teds, &r_tds, &r_teds, xpar_parameters->dr_method,
idr, xpar_parameters->fscrit, errc);
if (xpar_parameters->normalize)
{
dataset_free(n_tds);
dataset_free(n_teds);
}
}
else
{
idr = d;
r_tds = n_tds;
r_teds = n_teds;
}
if (!status)
{
if (method == PALG_SVM)
{
/*
Build the model, apply to the test data set.
*/
problem = create_problem(r_tds);
/*
Build probability model.
*/
svm_options = (struct svm_parameter *) xpar_parameters->options;
svm_options->probability = 1;
model = svm_train(problem, svm_options);
}
else if (method == PALG_MLP)
{
target = mlp_target(r_tds->c, r_tds->nd);
mlp_options = (struct mlp_options *) xpar_parameters->options;
inverse_video();
model = mlp_learn(mlp_options->opt_method, r_tds->x, r_tds->nv,
r_tds->nd, idr, target, mlp_options->nlayers,
mlp_options->npl, mlp_options->itmax,
mlp_options->range, mlp_options->eta,
mlp_options->mu, stdout, 0, (char *) 0, xpar_parameters->seed,
errc, xpar_parameters->fdbg);
}
if (model || (method == PALG_KNN))
{
if (!teds->prediction)
teds->prediction = malloc(teds->nv*sizeof(int));
rcl_fptr = fopen(PCP_RCL, "w");
if (rcl_fptr)
{
/*
For now, only SVM computes prediction strengths.
*/
if (method == PALG_SVM)
teds->ps = dmx_alloc(teds->nv, teds->c);
for (i = 0; (i < teds->nv) && !status; i++)
{
if (method == PALG_SVM)
{
svm_vector = create_svm_vector(r_teds->x[i], idr);
predicted_class = svm_predict_probability(model, svm_vector,
teds->ps[i])-1;
predicted_class = svm_predict((struct svm_model *) model, svm_vector)-1;
free(svm_vector);
}
else if (method == PALG_MLP)
{
output = mlp_output(model, r_teds->x[i]);
predicted_class = fvec_argmax(output, r_teds->c);
}
else if (method == PALG_KNN)
{
knn_options = (struct knn_options *) xpar_parameters->options;
predicted_class = knn(r_teds->x[i], 0, r_tds, xpar_parameters->x1,
knn_options->dist, errc, xpar_parameters->fdbg);
if (predicted_class == -1)
status = -1;
}
if (!status)
{
teds->prediction[i] = predicted_class;
write_rcl(rcl_fptr, i, teds, tds);
}
}
if (!status)
{
status = fclose(rcl_fptr);
if (!status)
{
printf("\n");
predict_disp(teds, 0, method);
if (method == PALG_SVM)
svm_destroy_model((struct svm_model *) model);
pwait();
}
}
}
else
{
*errc = errno;
status = -1;
if (xname)
*xname = strdup(PCP_RCL);
}
}
}
if (xpar_parameters->dr_method != PDR_NONE)
{
dataset_free(r_tds);
dataset_free(r_teds);
}
}
}
/*
Compute optimal feature transformations/subsets for all experiments
and all cross-validation subsets. We do it once at the beggining of
the simplex/grid search, and subsequently just reuse the results.
*/
int compute_dr(struct xpar_crit_parameters *xpar_parameters, int dr_method, int idr,
int fscrit, FILE *fdbg, int *errc)
{
int status;
status = 0;
if ((dr_method == PDR_SVD) || (dr_method == PDR_FISHER) || (dr_method == PDR_PCA) ||
(dr_method == PDR_EMAP))
{
xpar_parameters->fext = x_fext(tds, xpar_parameters->nexp, xpar_parameters->nxval,
xpar_parameters->normalize, dr_method,
idr, errc);
if (!xpar_parameters->fext)
status = -1;
}
else
{
xpar_parameters->fsel = x_fsel(tds, xpar_parameters->nexp, xpar_parameters->nxval,
xpar_parameters->normalize, dr_method,
idr, fscrit, PCP_XSF, fdbg, errc);
if (!xpar_parameters->fsel)
status = -1;
}
return status;
}
/*
Input model selection parameters common to all algorithms.
*/
void input_xpar(struct dataset *dset, struct xpar_crit_parameters *xpar_parameters)
{
int max_nxval;
int ex;
int dflt;
int i;
int dr_method;
int idr;
int fscrit;
char *msg;
max_nxval = ivec_min(dset->nd, dset->c);
xpar_parameters->nxval = input_nxval(stdin, stdout, max_nxval);
msg = malloc((PCP_QLEN+1)*sizeof(char));
xpar_parameters->nexp = input_nexp(msg);
free(msg);
xpar_parameters->seed = input_seed(stdin, stdout);
srand(xpar_parameters->seed);
dflt = 0;
ex = 1;
xpar_parameters->normalize = input_integer(stdin, stdout, PCP_UMSG_RAW,
PCP_QLEN, &dflt, &dflt, &ex);
ex = 0;
for (i = 0; i < dset->c; i++)
ex += (dset->nd[i]-1)/xpar_parameters->nxval+1;
dr_method = input_dr(stdout, dset->nv-ex, dset->d, dset->c, &idr, &fscrit);
xpar_parameters->dr_method = dr_method;
xpar_parameters->idr = idr;
xpar_parameters->fscrit = fscrit;
}
/*
Read MLP-specific model selection parameters.
*/
void input_xpar_mlp(int *nhl, int *nhh, int *hstep, int *nitl, int *nith, int *itstep)
{
int min_range;
int dflt;
char *msg;
msg = malloc((PCP_QLEN+1)*sizeof(char));
min_range = 1;
dflt = PCP_MLP_DFLT_NHL;
sprintf(msg, PCP_UMSG_MLP_MSEL1, dflt);
*nhl = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
min_range = *nhl;
dflt = 2*(*nhl);
sprintf(msg, PCP_UMSG_MLP_MSEL2, min_range, dflt);
*nhh = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
if (*nhh > *nhl)
{
min_range = 1;
dflt = (*nhh-*nhl)/PCP_MLP_DFLT_HSTP;
if (dflt == 0)
dflt = 1;
sprintf(msg, PCP_UMSG_MLP_MSEL3, min_range, dflt);
*hstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
}
else
*hstep = 1;
if (nitl)
{
min_range = 1;
dflt = PCP_MLP_DFLT_MIN;
sprintf(msg, PCP_UMSG_MLP_MSEL4, min_range, dflt);
*nitl = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
min_range = *nitl;
dflt = 2*min_range;
sprintf(msg, PCP_UMSG_MLP_MSEL5, min_range, dflt);
*nith = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
if (*nith > *nitl)
{
min_range = 1;
dflt = (*nith-*nitl)/PCP_MLP_DFLT_ISTP;
if (dflt == 0)
dflt = 1;
sprintf(msg, PCP_UMSG_MLP_MSEL3, min_range, dflt);
*itstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
}
else
*itstep = 1;
}
free(msg);
}
/*
Read k-NN-specific model selection parameters.
*/
void input_xpar_knn(struct dataset *dset, int nxval, int *kmin, int *kmax, int *kstep)
{
int i;
int min_range;
int max_range;
int mx;
int kval;
int dflt;
char *msg;
mx = 0;
msg = malloc((PCP_QLEN+1)*sizeof(char));
for (i = 0; i < dset->c; i++)
{
kval = dset->nd[i]-dset->nd[i]/nxval;
if ((i == 0) || (kval < mx))
mx = kval;
}
min_range = 1;
max_range = mx;
dflt = PCP_KNN_DFLT_KMIN;
sprintf(msg, PCP_UMSG_KNN_MSEL1, max_range, dflt);
*kmin = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, &max_range);
min_range = *kmin;
max_range = mx;
dflt = PCP_KNN_DFLT_KMAX;
if (dflt > max_range)
dflt = max_range;
sprintf(msg, PCP_UMSG_KNN_MSEL2, min_range, max_range, dflt);
*kmax = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, &max_range);
min_range = 1;
dflt = PCP_KNN_DFLT_KSTEP;
sprintf(msg, PCP_UMSG_KNN_MSEL3, dflt);
*kstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
free(msg);
}
syntax highlighted by Code2HTML, v. 0.9.1