/*
  File name: xpar.c
  Created by: Ljubomir Buturovic
  Created: 02/11/2005
  Purpose: find optimal parameters minimizing cross-validation error
  of a classifier.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#include <stdlib.h>
#include <float.h>
#include <errno.h>
#include <unistd.h>
#include <math.h>
#include <string.h>
#include "xpar.h"
#include "lau.h"
#include "lmat.h"
#include "pau.h"
#include "pcp.h"
#include "simplex.h"
#include "fselect.h"
#include "xlearn.h"
#include "pcl_svm.h"
#include "svm.h"
#include "cda.h"
#include "emap.h"
#include "mlp.h"
#include "knn.h"

static char rcsid[] = "$Id: xpar.c,v 1.78 2006/05/24 05:04:19 ljubomir Exp $";

/*
  Save optimal feature subsets computed by x_fsel().
  
  Return -1 and set errno in case of file error.
*/
static int save_fsel(int ***fsel, struct dataset *dset, int idr, int nexp, int nxval)
{
  int  i;
  int  j;
  int  k;
  int  idx;
  int  status;
  FILE *fptr;

  status = 0;
  fptr = fopen(PCP_XSL, "w");
  if (fptr)
    {
      for (i = 0; i < idr; i++)
	{
	  for (j = 0; j < nexp; j++)
	    for (k = 0; k < nxval; k++)
	      fprintf(fptr, "%d\t", fsel[j][k][i]+1);
	  if (dset->alab)
	    {
	      for (j = 0; j < nexp; j++)
		for (k = 0; k < nxval; k++)
		  {
		    idx = fsel[j][k][i];
		    fprintf(fptr, "%s\t", dset->alab[idx]);
		  }
	    }
	  fprintf(fptr, "\n");
	}
      status = fclose(fptr);
    }
  else
    status = -1;
  return status;
}

int ***fsel_free(int ***fsel, int nexp, int nxval)
{
  int i;
  int j;

  if (fsel)
    {
      for (i = 0; i < nexp; i++)
	{
	  for (j = 0; j < nxval; j++)
	    free(fsel[i][j]);
	  free(fsel[i]);
	}
      free(fsel);
    }
  return (int ***) 0;
}

void xpar_free(struct xpar_crit_parameters *xpar_parameters)
{
  free(xpar_parameters->options);
  fsel_free(xpar_parameters->fsel, xpar_parameters->nexp, xpar_parameters->nxval);
  free(xpar_parameters);
}

/*
  Log cross-validation subset generated in experiment `ex',
  cross-validation subset `xval_idx'.

  The subsets generated by x_fsel() and xpar_func() must be identical.
*/
void log_subset(FILE *fdbg, char *func, struct dataset *learning_dset, int ex, int xval_idx)
{
  int jdx;
  int ndx;

  jdx = learning_dset->d;
  ndx = learning_dset->nv;
  fprintf(fdbg, "%s; experiment: %d; xval: %d; learning_dset->x[0][0]: %12.5g; learning_dset->x[%d][%d]: %12.5g\n",
	  func, ex, xval_idx, learning_dset->x[0][0], ndx-1, jdx-1, learning_dset->x[ndx-1][jdx-1]);
  fflush(fdbg);
}

/*
  Compute optimal feature subsets in `dset' for `nexp' experiments and
  `nxval' cross-validation subsets. The feature selection chooses
  optimal `idr' features using `dr_method' and `fscrit' criterion. If
  `normalize' is 1, the data is optionally normalized first.

  This function is used to improve (significantly) performance of
  Model Selection functionality. Model Selection repeats
  cross-validation estimation for various values of SVM classifier
  parameters. This process optionally includes (a very time consuming)
  dimensionality reduction for each experiment and each
  cross-validation subset. However, there is no need to execute this
  repeatedly for different SVM parameter values, since the inputs are
  identical, and therefore the results will be identical. It is
  sufficient to compute the chosen feature subsets once, and then
  reuse them for subsequent cross-validation experiments. That is the
  job of x_fsel().

  Optionally write intermediate results in `fname'.

  The function returns array `fsel'. It contains selected feature
  subsets for all `nexp' experiments and `nxval' cross-validation
  subsets. For example, fsel[2][3] is the list of features selected in
  experiment 2, cross-validation subset 3.

  In case of failure, return NULL and set `errc'.
*/
static int ***x_fsel(struct dataset *dset, int nexp, int nxval, int normalize,
		     int dr_method, int idr, int fscrit, char *fname, FILE *fdbg,
		     int *errc)
{
  int  status;
  int  i;
  int  j;
  int  k;
  int  idx;
  int  *findex;
  int  *vfx;
  int  **sxc;
  int  **lxc;
  int  ***fsel;
  FILE *fptr;
  struct dataset *learning_dset;
  struct dataset *uset;

  status = 0;
  fptr = (FILE *) 0;
  fsel = malloc(nexp*sizeof(int **));
  if (fsel)
    {
      for (i = 0; (i < nexp) && !status; i++)
	{
	  fsel[i] = malloc(nxval*sizeof(int *));
	  if (!fsel[i])
	    {
	      status = -1;
	      *errc = errno;
	    }
	}
      if (!status && fname)
	{
	  fptr = fopen(fname, "w");
	  if (!fptr)
	    {
	      status = -1;
	      *errc = errno;
	    }
	}
      for (i = 0; (i < nexp) && !status; i++)
	{
	  status = xpart(dset->c, dset->nd, nxval, &sxc, &lxc);
	  if (!status)
	    {
	      for (j = 0; (j < nxval) && !status; j++)
		{
		  status = xset(dset, lxc, sxc, j, &learning_dset, (struct dataset **) 0, (FILE *) 0);
		  if (!status)
		    {
		      if (normalize)
			status = normalize_attributes(learning_dset, (struct dataset *) 0);
		      if (!status)
			{
			  if (fdbg)
			    log_subset(fdbg, "x_fsel()", learning_dset, i, j);
			  uset = select_subset(learning_dset, idr, dr_method, fscrit, &findex, 0, errc);
			  dataset_free(uset);
			  if (*errc == 0)
			    {
			      vfx = ivec_clone(findex, idr);
			      free(findex);
			      fsel[i][j] = vfx;
			      if (fname)
				{
				  for (k = 0; k < idr; k++)
				    {
				      idx = vfx[k];
				      if (dset->alab)
					fprintf(fptr, "%s\t", dset->alab[idx]);
				      else
					fprintf(fptr, "%d\t", idx+1);
				    }
				  fprintf(fptr, "\n");
				  fflush(fptr);
				}
			    }
			  else
			    fsel = (int ***) 0;
			}
		      else
			fsel = (int ***) 0;
		      dataset_free(learning_dset);
		    }
		  else
		    fsel = (int ***) 0;
		}
	      mx_free((void **) sxc, dset->c);
	      mx_free((void **) lxc, dset->c);
	    }
	}
      if (fname)
	{
	  status = fclose(fptr);
	  if (status != 0)
	    {
	      *errc = errno;
	      fsel = fsel_free(fsel, nexp, nxval);
	    }
	}
    }
  else
    {
      status = -1;
      *errc = errno;
    }
  if (!status)
    {
      status = save_fsel(fsel, dset, idr, nexp, nxval);
      if (status == -1)
	{
	  *errc = errno;
	  fsel = fsel_free(fsel, nexp, nxval);
	}
    }
  return fsel;
}

/*
  Feature extraction equivalent of x_fsel(): compute optimal feature
  mappings of `dset' for `nexp' experiments and `nxval'
  cross-validation subsets.

  The function returns array `fext'. It contains optimal feature
  mappings (i.e., transformation matrices) for all `nexp' experiments
  and `nxval' cross-validation subsets. For example, fext[2][3] is the
  feature transformation matrix selected in experiment 2,
  cross-validation subset 3.

  In case of failure, return NULL and set `errc'.
*/
static float ****x_fext(struct dataset *dset, int nexp, int nxval, int normalize,
			int dr_method, int idr, int *errc)
{
  int   status;
  int   i;
  int   j;
  int   idx;
  int   **sxc;
  int   **lxc;
  float **map;
  float ****fext;
  struct dataset *learning_dset;

  status = 0;
  map = (float **) 0;
  fext = malloc(nexp*sizeof(float ***));
  if (fext)
    {
      for (i = 0; (i < nexp) && !status; i++)
	fext[i] = malloc(nxval*sizeof(float **));
      for (i = 0; (i < nexp) && !status; i++)
	{
	  status = xpart(dset->c, dset->nd, nxval, &sxc, &lxc);
	  if (!status)
	    {
	      for (j = 0; (j < nxval) && !status; j++)
		{
		  status = xset(dset, lxc, sxc, j, &learning_dset, (struct dataset **) 0, (FILE *) 0);
		  if (!status)
		    {
		      if (normalize)
			status = normalize_attributes(learning_dset, (struct dataset *) 0);
		      if (!status)
			{
			  if (dr_method == PDR_SVD)
			    map = svd_transform(learning_dset->x, learning_dset->nv, learning_dset->d, errc);
			  else if ((dr_method == PDR_PCA) || (dr_method == PDR_FISHER))
			    {
			      if (dr_method == PDR_PCA)
				map = pca(learning_dset, errc);
			      else if (dr_method == PDR_FISHER)
				map = fld(learning_dset, errc);
			    }
			  else if (dr_method == PDR_EMAP)
			    {
			      idx = ivec_min(learning_dset->nd, learning_dset->c)-1;
			      map = emap(learning_dset, idr, 1, idx, errc);
			    }
			  if (map)
			    fext[i][j] = map;
			  else
			    fext = (float ****) 0;
			}
		      else
			fext = (float ****) 0;
		      dataset_free(learning_dset);
		    }
		  else
		    fext = (float ****) 0;
		}
	      mx_free((void **) sxc, dset->c);
	      mx_free((void **) lxc, dset->c);
	    }
	}
    }
  return fext;
}

/*
  In case of two classes, add confusion matrix to the model selection file.
*/
static void save_cmx(FILE *fptr, int c, float sens, float spec, float ppv, float npv)
{
  if (c == 2)
    fprintf(fptr, "\t%7.2f\t%7.2f\t%7.2f\t%7.2f", sens, spec, ppv, npv);
  fprintf(fptr, "\n");
}

/*
  Save the criterion function value and parameters for C-SVM.
*/
static int save_C_val(float ecrit, float *fvec, int n, int iteration, int c, 
		      int kernel_type, float sens, float spec, float ppv, float npv)
{
  int   i;
  int   status;
  FILE *fptr;

  status = 0;
  fptr = fopen(PCP_MSL, "a");
  if (fptr)
    {
      fprintf(fptr, "%7.2f\t", ecrit);
      for (i = 0; i < n-1; i++)
	fprintf(fptr, "%12.5g\t", pow(10, fvec[i]));
      fprintf(fptr, "%12.5g\t%d", pow(10, fvec[n-1]), iteration);
      save_cmx(fptr, c, sens, spec, ppv, npv);
      status = fclose(fptr);
    }
  else
    status = -1;
  return status;
}

/*
  Save the criterion function value and parameters for NU-SVM.
*/
static int save_nu_val(float ecrit, float *fvec, int n, int iteration, int c, 
		       int kernel_type, float sens, float spec, float ppv, float npv)
{
  int   status;
  FILE *fptr;

  status = 0;
  fptr = fopen(PCP_MSL, "a");
  if (fptr)
    {
      fprintf(fptr, "%7.2f\t", ecrit);
      fprintf(fptr, "%12.5g\t", fvec[0]); /* nu */
      fprintf(fptr, "%12.5g\t%d", pow(10, fvec[n-1]), iteration); /* gamma */
      save_cmx(fptr, c, sens, spec, ppv, npv);
      status = fclose(fptr);
    }
  else
    status = -1;
  return status;
}

/*
  Save the criterion function value and parameters.
*/
static int save_val(float ecrit, float *fvec, int n, int iteration, int c, 
		    float sens, float spec, float ppv, float npv)
{
  int   i;
  int   status;
  FILE *fptr;

  status = 0;
  fptr = fopen(PCP_MSL, "a");
  if (fptr)
    {
      fprintf(fptr, "%7.2f\t", ecrit);
      for (i = 0; i < n-1; i++)
	fprintf(fptr, "%12.5g\t", fvec[i]);
      fprintf(fptr, "%12.5g\t%d", fvec[n-1], iteration);
      save_cmx(fptr, c, sens, spec, ppv, npv);
      status = fclose(fptr);
    }
  else
    status = -1;
  return status;
}

/*
  Create `xpar_crit_parameters' structure for `method'.
*/
struct xpar_crit_parameters *init_spar(int method, struct dataset *dset, FILE *fdbg)
{
  struct xpar_crit_parameters *xpar_params;
  
  xpar_params = calloc(1, sizeof(struct xpar_crit_parameters));
  if (xpar_params)
    {
      xpar_params->dset = dset;
      xpar_params->outdev = (FILE *) 0;
      xpar_params->fdbg = fdbg;
      xpar_params->vid = 0;
      xpar_params->eval = FLT_MAX;
      xpar_params->method = method;
      xpar_params->nmodels = 1;
    }
  return xpar_params;
}

/*
  Compute value of xpar() criterion function. xpar() is a wrapper
  around xlearn(), which computes cross-validation error rate for a
  given dimension reduction/classifier combination.

  xpar_func() uses rand() to partition input dataset into learning and
  validation subsets for cross-validation. The pseudo-random number
  generator is initialized using srand(parameters->seed) function
  call. Therefore, to reproduce the results for a given dataset, you
  need to give the same parameters->seed value.

  In case of error, set 'errc'. The errors are malloc() and xlearn()
  errors.
*/
float xpar_func(float *fvec, int n, int iteration, void *parameters, int *errc)
{
  int   idx;
  int   method;
  int   dr_method;
  int   idr;
  int   fscrit;
  int   normalize;
  int   nmodels;
  int   status;
  int   vid; 
  int   error_count;
  int   ex;
  int   nexp;
  int   bag_size;
  int   c;
  int   nxval;
  int   i;
  int   t_p;
  int   f_n;
  int   f_p;
  int   t_n;
  int   kernel_type;
  float ecrit;
  float amce;
  float xmce;
  float sens;
  float spec;
  float ppv;
  float npv;
  float nu_max;
  int   *findex;
  int   *ccer;
  int   *tccer;
  int   **sxc;
  int   **lxc;
  float **fext;
  FILE  *outdev;
  FILE  *fdbg;
  struct xpar_crit_parameters *xpar_parameters;
  struct dataset *dset; /* training data set */
  struct dataset *learning_dset;
  struct dataset *validation_dset;
  struct dataset *l_dset; /* learning/validation sets after norm./DR */
  struct dataset *v_dset;
  unsigned int seed;

  status = 0;
  error_count = -1;
  xpar_parameters = (struct xpar_crit_parameters *) parameters;
  method = xpar_parameters->method;
  fdbg = xpar_parameters->fdbg;
  nexp = xpar_parameters->nexp;
  dset = xpar_parameters->dset;
  nxval = xpar_parameters->nxval;
  /*
    NU-SVM parameter optimization is actually constrained optimization
    problem, since nu must be in [0, nu_max] (see get_svm_nu_max() for
    definition of nu_max). Simplex does not support constrained
    optimization.

    For now, do this: if nu violates the constraint, return a large
    value. That should discourage Simplex from going in that
    direction.
  */
  idx = 0; /* idx = 1 means constraint violation */
  if ((method == PALG_SVM) && ((struct svm_parameter *) xpar_parameters->options)->svm_type == NU_SVC)
    {
      nu_max = get_svm_nu_xmax(dset->nd, dset->c, nxval);
      /*
	Check if nu is outside of range.
      */
      if ((fvec[0] < 0) || (fvec[0] > nu_max))
	idx = 1;
    }
  if (idx)
    ecrit = FLT_MAX;
  else
    {
      dr_method = xpar_parameters->dr_method;
      idr = xpar_parameters->idr;
      fscrit = xpar_parameters->fscrit;
      normalize = xpar_parameters->normalize;
      nmodels = xpar_parameters->nmodels;
      vid = xpar_parameters->vid;
      seed = xpar_parameters->seed;
      outdev = xpar_parameters->outdev;
      c = dset->c;
      fdbg = xpar_parameters->fdbg;
      /*
	Compute cross-validation error rates for the given learning
	method and parameters.

	srand() ensures reproducible results. This call guarantees
	that each invocation of xpar_func() with identical input
	arguments will produce identical result.
      */
      srand(seed);
      tccer = calloc(c, sizeof(int)); /* class-conditional error counts for one experiment */
      ecrit = 0.0;
      sens = 0.0;
      spec = 0.0;
      ppv = 0.0;
      npv = 0.0;
      for (ex = 0; (ex < nexp) && !status; ex++)
	{
	  amce = 0.0;
	  status = xpart(c, dset->nd, nxval, &sxc, &lxc);
	  /*
	    lxc[i][j] is number of class i vectors in cross-validation
	    subset j.
	    
	    sxc[i] is list of class i vectors with arranged so that
	    cross-validation subsets are in sequence.  For example, if
	    there are 3 cross-validation subsets with 10 vectors each,
	    sxc[i][0..9] is list of class i vectors in subset 0,
	    sxc[i][10..19] is list of class i vectors in subset 1, and
	    sxc[i][20..29] is list of class i vectors in subset 2.
	  */
	  if (!status)
	    {
	      ivec_set(tccer, c, 0);
	      for (idx = 0; (idx < nxval) && !status; idx++)
		{
		  status = xset(dset, lxc, sxc, idx, &learning_dset, &validation_dset, fdbg);
		  /*
		    NOTE: in November 2002 decided to fix the size of
		    each bagging set to the cardinality of the
		    training data set. Perhaps revisit.
		  */
		  bag_size = learning_dset->nv;
		  if (method == PALG_SVM)
		    {
		      if (((struct svm_parameter *) xpar_parameters->options)->svm_type == C_SVC)
			((struct svm_parameter *) xpar_parameters->options)->C = pow(10, fvec[0]);
		      else
			((struct svm_parameter *) xpar_parameters->options)->nu = fvec[0];
		      ((struct svm_parameter *) xpar_parameters->options)->gamma = pow(10, fvec[1]);
		      ((struct svm_parameter *) xpar_parameters->options)->verbose = 0;
		    }
		  else if (method == PALG_MLP)
		    {
		      ((struct mlp_options *) xpar_parameters->options)->npl[0] = fvec[0];
		      ((struct mlp_options *) xpar_parameters->options)->itmax = fvec[1];
		    }
		  else if (method == PALG_KNN)
		    ((struct knn_options *) xpar_parameters->options)->k = fvec[0];
		  l_dset = (struct dataset *) 0;
		  v_dset = (struct dataset *) 0;
		  /*
		    If feature subsets or feature mappings are already
		    provided, reduce dimensionality of the datasets
		    and signal to xlearn() not to do any feature
		    selection.
		   */
		  if (xpar_parameters->fsel)
		    {
		      if (normalize)
			status = normalize_attributes(learning_dset, validation_dset);
		      if (!status)
			{
			  if (fdbg)
			    log_subset(fdbg, "xpar_func()", learning_dset, ex, idx);
			  findex = xpar_parameters->fsel[ex][idx];
			  l_dset = dataset_select(learning_dset, findex, idr);
			  if (l_dset)
			    {
			      v_dset = dataset_select(validation_dset, findex, idr); 
			      if (v_dset)
				error_count = xlearn(method, PDR_NONE, idr, fscrit, 
						     0, l_dset, v_dset, seed, nmodels,
						     bag_size, xpar_parameters->options,
						     &ccer, outdev, fdbg, errc);
			      else
				status = -1;
			    }
			}
		    }
		  else if (xpar_parameters->fext)
		    {
		      if (normalize)
			status = normalize_attributes(learning_dset, validation_dset);
		      if (!status)
			{
			  fext = xpar_parameters->fext[ex][idx];
			  l_dset = dataset_map(learning_dset, idr, fext);
			  if (l_dset)
			    {
			      v_dset = dataset_map(validation_dset, idr, fext);
			      if (v_dset)
				error_count = xlearn(method, PDR_NONE, idr, fscrit, 
						     0, l_dset, v_dset, seed, nmodels,
						     bag_size, xpar_parameters->options,
						     &ccer, outdev, fdbg, errc);
			      else
				status = -1;
			    }
			}
		    }
		  else
		    error_count = xlearn(method, dr_method, idr, fscrit, normalize, learning_dset,
					 validation_dset, seed, nmodels, bag_size, 
					 xpar_parameters->options, &ccer, outdev, fdbg, errc);
		  if (error_count >= 0)
		    {
		      for (i = 0; i < c; i++)
			tccer[i] += ccer[i];
		      free(ccer);
		      amce += error_count;
		    }
		  else
		    status = -1;
		  dataset_free(validation_dset);
		  dataset_free(learning_dset);
		  dataset_free(v_dset);
		  dataset_free(l_dset);
		}
	      mx_free((void **) sxc, c);
	      mx_free((void **) lxc, c);
	      if (!status)
		{
		  xmce = 100.0*amce/dset->nv; /* error rate for this experiment */
		  viprint_line(4, 1, "Experiment %6d cross-validation error rate:  "
			       "%6.2f%%                       ", ex+1, xmce);
		  ecrit += xmce;
		  if (c == 2)
		    {
		      /*
			NOTE: sensitivity, specificity, positive and
			negative predictive values (SSPN) are defined
			based on a confusion matrix, which in turn is
			defined for a single experiment. Here we have
			multiple experiments, and thus multiple
			confusion matrices (one per experiment). The
			final SSPN are computed as the averages over
			the experiments.
		      */
		      f_p = tccer[1];
		      f_n = tccer[0];
		      t_p = dset->nd[0]-f_n;
		      t_n = dset->nd[1]-f_p;
		      sens += 100.0*t_p/dset->nd[0];
		      spec += 100.0*t_n/dset->nd[1];
		      if (t_p+f_p > 0)
			ppv += 100.0*t_p/(t_p+f_p);
		      if (t_n+f_n > 0)
			npv += 100.0*t_n/(t_n+f_n);
		    }
		}
	    }
	  else
	    *errc = errno;
	}
      free(tccer);
      if (!status)
	{
	  ecrit = ecrit/nexp;
	  if (c == 2)
	    {
	      sens = sens/nexp;
	      spec = spec/nexp;
	      ppv = ppv/nexp;
	      npv = npv/nexp;
	    }
	  if (ecrit < xpar_parameters->eval)
	    {
	      xpar_parameters->eval = ecrit;
	      xpar_parameters->x1 = fvec[0];
	      xpar_parameters->x2 = fvec[1];
	    }
	  if (method == PALG_SVM)
	    {
	      kernel_type = ((struct svm_parameter *) xpar_parameters->options)->kernel_type;
	      if (((struct svm_parameter *) xpar_parameters->options)->svm_type == C_SVC)
		status = save_C_val(ecrit, fvec, n, iteration, c, kernel_type, 
				    sens, spec, ppv, npv);
	      else
		status = save_nu_val(ecrit, fvec, n, iteration, c, kernel_type, 
				     sens, spec, ppv, npv);
	    }
	  else
	    status = save_val(ecrit, fvec, n, iteration, c, sens, spec, ppv, npv);
	  if (status == -1)
	    *errc = errno;
	  if (iteration == 0)
	    viprint_line(6, 1, "Iteration %7d; vertex %7d; %s %7.2f%%", iteration, vid,
			 XPAR_FUNC_MSG, ecrit);
	  else
	    viprint_line(5, 1, "Iteration %7d; %s %7.2f%%", iteration, XPAR_FUNC_MSG, ecrit);
	  /*inverse_video();
	    fprintf(outdev, "\n");*/
	}
      else
	ecrit = -1.0;
    }
  return ecrit;
}

int xtest_optimal_malloc(float **tds_x, int tds_nv, int d, float **teds_x, 
			 int teds_nv, float ***x, float ***y, float **xmean,
			 float **std)
{
  int   status;
  float *l_xmean;
  float *l_std;
  float **l_x;
  float **l_y;

  status = -1;
  l_xmean = fmx_mean(tds_x, tds_nv, d);
  if (l_xmean)
    {
      l_std = fmx_std(tds_x, tds_nv, d);
      if (l_std)
	{
	  l_x = fmx_clone(tds_x, tds_nv, d);
	  if (l_x)
	    {
	      l_y = fmx_clone(teds_x, teds_nv, d);
	      if (l_y)
		{
		  status = 0;
		  *x = l_x;
		  *y = l_y;
		  *xmean = l_xmean;
		  *std = l_std;
		}
	    }
	}
    }
  return status;
}

/*
  Apply optimal parameters to test data set, display and save results.

  In case of success, set *errc to 0, otherwise set it to the error
  code.
*/
void xtest_optimal(struct xpar_crit_parameters *xpar_parameters, int *errc, char **xname)
{
  int   status;
  int   i;
  int   d;
  int   idr;
  int   predicted_class;
  int   method;
  int   *nd;
  float *xmean;
  float *std;
  float *output;
  void  *model;
  float **target;
  struct dataset *n_tds;
  struct dataset *n_teds;
  struct dataset *r_tds;
  struct dataset *r_teds;
  struct svm_node *svm_vector = (struct svm_node *) 0;
  struct svm_problem *problem;
  struct mlp_options *mlp_options;
  struct knn_options *knn_options;
  struct svm_parameter *svm_options;
  FILE   *rcl_fptr;

  status = 0;
  model = (void *) 0;
  predicted_class = 0;
  d = tds->d;
  method = xpar_parameters->method;
  /*
    Apply normalization and/or dimensionality reduction.
  */
  if (xpar_parameters->normalize)
    {
      nd = ivec_clone(tds->nd, tds->c);
      n_tds = dataset_lt(d, tds->c, nd, tds->nv, (char **) 0, (float **) 0);
      nd = ivec_clone(teds->nd, teds->c);
      n_teds = dataset_lt(d, teds->c, nd, teds->nv, (char **) 0, (float **) 0);
      status = xtest_optimal_malloc(tds->x, tds->nv, d, teds->x, teds->nv, &n_tds->x, &n_teds->x, &xmean, &std);
      if (!status)
	{
	  fmx_prenorm(n_tds->x, tds->nv, d, xmean, std);
	  /*
	    Test data set is normalized using mean/std. from training
	    data set.
	  */
	  fmx_prenorm(n_teds->x, teds->nv, d, xmean, std);
	}
      else
	*errc = errno;
    }
  else
    {
      n_tds = tds;
      n_teds = teds;
    }
  if (!status)
    {
      if (xpar_parameters->dr_method != PDR_NONE)
	{
	  idr = xpar_parameters->idr;
	  status = reduce_d(n_tds, n_teds, &r_tds, &r_teds, xpar_parameters->dr_method, 
			    idr, xpar_parameters->fscrit, errc);
	  if (xpar_parameters->normalize)
	    {
	      dataset_free(n_tds);
	      dataset_free(n_teds);
	    }
	}
      else
	{
	  idr = d;
	  r_tds = n_tds;
	  r_teds = n_teds;
	}
      if (!status)
	{
	  if (method == PALG_SVM)
	    {
	      /*
		Build the model, apply to the test data set.
	      */
	      problem = create_problem(r_tds);
	      /*
		Build probability model.
	      */
	      svm_options = (struct svm_parameter *) xpar_parameters->options;
	      svm_options->probability = 1;
	      model = svm_train(problem, svm_options);
	    }
	  else if (method == PALG_MLP)
	    {
	      target = mlp_target(r_tds->c, r_tds->nd);
	      mlp_options = (struct mlp_options *) xpar_parameters->options;
	      inverse_video();
	      model = mlp_learn(mlp_options->opt_method, r_tds->x, r_tds->nv,
				r_tds->nd, idr, target, mlp_options->nlayers,
				mlp_options->npl, mlp_options->itmax, 
				mlp_options->range, mlp_options->eta, 
				mlp_options->mu, stdout, 0, (char *) 0, xpar_parameters->seed,
				errc, xpar_parameters->fdbg);
	    }
	  if (model || (method == PALG_KNN))
	    {
	      if (!teds->prediction)
		teds->prediction = malloc(teds->nv*sizeof(int));
	      rcl_fptr = fopen(PCP_RCL, "w");
	      if (rcl_fptr)
		{
		  /*
		    For now, only SVM computes prediction strengths.
		   */
		  if (method == PALG_SVM)
		    teds->ps = dmx_alloc(teds->nv, teds->c);
		  for (i = 0; (i < teds->nv) && !status; i++)
		    {
		      if (method == PALG_SVM)
			{
			  svm_vector = create_svm_vector(r_teds->x[i], idr);
			  predicted_class = svm_predict_probability(model, svm_vector, 
								    teds->ps[i])-1;
			  predicted_class = svm_predict((struct svm_model *) model, svm_vector)-1;
			  free(svm_vector);
			}
		      else if (method == PALG_MLP)
			{
			  output = mlp_output(model, r_teds->x[i]);
			  predicted_class = fvec_argmax(output, r_teds->c);
			}
		      else if (method == PALG_KNN)
			{
			  knn_options = (struct knn_options *) xpar_parameters->options;
			  predicted_class = knn(r_teds->x[i], 0, r_tds, xpar_parameters->x1,
						knn_options->dist, errc, xpar_parameters->fdbg);
			  if (predicted_class == -1)
			    status = -1;
			}
		      if (!status)
			{
			  teds->prediction[i] = predicted_class;
			  write_rcl(rcl_fptr, i, teds, tds);
			}
		    }
		  if (!status)
		    {
		      status = fclose(rcl_fptr);
		      if (!status)
			{
			  printf("\n");
			  predict_disp(teds, 0, method);
			  if (method == PALG_SVM)
			    svm_destroy_model((struct svm_model *) model);
			  pwait();
			}
		    }
		}
	      else
		{
		  *errc = errno;
		  status = -1;
		  if (xname)
		    *xname = strdup(PCP_RCL);
		}
	    }
	}
      if (xpar_parameters->dr_method != PDR_NONE)
	{
	  dataset_free(r_tds);
	  dataset_free(r_teds);
	}
    }
}

/*
  Compute optimal feature transformations/subsets for all experiments
  and all cross-validation subsets. We do it once at the beggining of
  the simplex/grid search, and subsequently just reuse the results.
*/
int compute_dr(struct xpar_crit_parameters *xpar_parameters, int dr_method, int idr, 
	       int fscrit, FILE *fdbg, int *errc)
{
  int status;

  status = 0;
  if ((dr_method == PDR_SVD) || (dr_method == PDR_FISHER) || (dr_method == PDR_PCA) ||
      (dr_method == PDR_EMAP))
    {
      xpar_parameters->fext = x_fext(tds, xpar_parameters->nexp, xpar_parameters->nxval,
				     xpar_parameters->normalize, dr_method,
				     idr, errc);
      if (!xpar_parameters->fext)
	status = -1;
    }
  else
    {
      xpar_parameters->fsel = x_fsel(tds, xpar_parameters->nexp, xpar_parameters->nxval,
				     xpar_parameters->normalize, dr_method,
				     idr, fscrit, PCP_XSF, fdbg, errc);
      if (!xpar_parameters->fsel)
	status = -1;
    }
  return status;
}

/*
  Input model selection parameters common to all algorithms.
*/
void input_xpar(struct dataset *dset, struct xpar_crit_parameters *xpar_parameters)
{
  int  max_nxval;
  int  ex;
  int  dflt;
  int  i;
  int  dr_method;
  int  idr;
  int  fscrit;
  char *msg;

  max_nxval = ivec_min(dset->nd, dset->c);
  xpar_parameters->nxval = input_nxval(stdin, stdout, max_nxval);
  
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  xpar_parameters->nexp = input_nexp(msg);
  free(msg);
  
  xpar_parameters->seed = input_seed(stdin, stdout);
  srand(xpar_parameters->seed);

  dflt = 0;
  ex = 1;
  xpar_parameters->normalize = input_integer(stdin, stdout, PCP_UMSG_RAW, 
					     PCP_QLEN, &dflt, &dflt, &ex);
  
  ex = 0;
  for (i = 0; i < dset->c; i++)
    ex += (dset->nd[i]-1)/xpar_parameters->nxval+1;
  dr_method = input_dr(stdout, dset->nv-ex, dset->d, dset->c, &idr, &fscrit);
  xpar_parameters->dr_method = dr_method;
  xpar_parameters->idr = idr;
  xpar_parameters->fscrit = fscrit;
}

/*
  Read MLP-specific model selection parameters.
*/
void input_xpar_mlp(int *nhl, int *nhh, int *hstep, int *nitl, int *nith, int *itstep)
{
  int  min_range;
  int  dflt;
  char *msg;

  msg = malloc((PCP_QLEN+1)*sizeof(char));

  min_range = 1;
  dflt = PCP_MLP_DFLT_NHL;
  sprintf(msg, PCP_UMSG_MLP_MSEL1, dflt);
  *nhl = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);

  min_range = *nhl;
  dflt = 2*(*nhl);
  sprintf(msg, PCP_UMSG_MLP_MSEL2, min_range, dflt);
  *nhh = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);

  if (*nhh > *nhl)
    {
      min_range = 1;
      dflt = (*nhh-*nhl)/PCP_MLP_DFLT_HSTP;
      if (dflt == 0)
	dflt = 1;
      sprintf(msg, PCP_UMSG_MLP_MSEL3, min_range, dflt);
      *hstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
    }
  else
    *hstep = 1;

  if (nitl)
    {
      min_range = 1;
      dflt = PCP_MLP_DFLT_MIN;
      sprintf(msg, PCP_UMSG_MLP_MSEL4, min_range, dflt);
      *nitl = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
      
      min_range = *nitl;
      dflt = 2*min_range;
      sprintf(msg, PCP_UMSG_MLP_MSEL5, min_range, dflt);
      *nith = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
      
      if (*nith > *nitl)
	{
	  min_range = 1;
	  dflt = (*nith-*nitl)/PCP_MLP_DFLT_ISTP;
	  if (dflt == 0)
	    dflt = 1;
	  sprintf(msg, PCP_UMSG_MLP_MSEL3, min_range, dflt);
	  *itstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
	}
      else
	*itstep = 1;
    }
  free(msg);
}

/*
  Read k-NN-specific model selection parameters.
*/
void input_xpar_knn(struct dataset *dset, int nxval, int *kmin, int *kmax, int *kstep)
{
  int  i;
  int  min_range;
  int  max_range;
  int  mx;
  int  kval;
  int  dflt;
  char *msg;

  mx = 0;
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  for (i = 0; i < dset->c; i++)
    {
      kval = dset->nd[i]-dset->nd[i]/nxval;
      if ((i == 0) || (kval < mx))
	mx = kval;
    }

  min_range = 1;
  max_range = mx;
  dflt = PCP_KNN_DFLT_KMIN;
  sprintf(msg, PCP_UMSG_KNN_MSEL1, max_range, dflt);
  *kmin = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, &max_range);

  min_range = *kmin;
  max_range = mx;
  dflt = PCP_KNN_DFLT_KMAX;
  if (dflt > max_range)
    dflt = max_range;
  sprintf(msg, PCP_UMSG_KNN_MSEL2, min_range, max_range, dflt);
  *kmax = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, &max_range);

  min_range = 1;
  dflt = PCP_KNN_DFLT_KSTEP;
  sprintf(msg, PCP_UMSG_KNN_MSEL3, dflt);
  *kstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0);
  
  free(msg);
}


syntax highlighted by Code2HTML, v. 0.9.1