/* File name: xpar.c Created by: Ljubomir Buturovic Created: 02/11/2005 Purpose: find optimal parameters minimizing cross-validation error of a classifier. */ /* Copyright 2004 Ljubomir J. Buturovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include "xpar.h" #include "lau.h" #include "lmat.h" #include "pau.h" #include "pcp.h" #include "simplex.h" #include "fselect.h" #include "xlearn.h" #include "pcl_svm.h" #include "svm.h" #include "cda.h" #include "emap.h" #include "mlp.h" #include "knn.h" static char rcsid[] = "$Id: xpar.c,v 1.78 2006/05/24 05:04:19 ljubomir Exp $"; /* Save optimal feature subsets computed by x_fsel(). Return -1 and set errno in case of file error. */ static int save_fsel(int ***fsel, struct dataset *dset, int idr, int nexp, int nxval) { int i; int j; int k; int idx; int status; FILE *fptr; status = 0; fptr = fopen(PCP_XSL, "w"); if (fptr) { for (i = 0; i < idr; i++) { for (j = 0; j < nexp; j++) for (k = 0; k < nxval; k++) fprintf(fptr, "%d\t", fsel[j][k][i]+1); if (dset->alab) { for (j = 0; j < nexp; j++) for (k = 0; k < nxval; k++) { idx = fsel[j][k][i]; fprintf(fptr, "%s\t", dset->alab[idx]); } } fprintf(fptr, "\n"); } status = fclose(fptr); } else status = -1; return status; } int ***fsel_free(int ***fsel, int nexp, int nxval) { int i; int j; if (fsel) { for (i = 0; i < nexp; i++) { for (j = 0; j < nxval; j++) free(fsel[i][j]); free(fsel[i]); } free(fsel); } return (int ***) 0; } void xpar_free(struct xpar_crit_parameters *xpar_parameters) { free(xpar_parameters->options); fsel_free(xpar_parameters->fsel, xpar_parameters->nexp, xpar_parameters->nxval); free(xpar_parameters); } /* Log cross-validation subset generated in experiment `ex', cross-validation subset `xval_idx'. The subsets generated by x_fsel() and xpar_func() must be identical. */ void log_subset(FILE *fdbg, char *func, struct dataset *learning_dset, int ex, int xval_idx) { int jdx; int ndx; jdx = learning_dset->d; ndx = learning_dset->nv; fprintf(fdbg, "%s; experiment: %d; xval: %d; learning_dset->x[0][0]: %12.5g; learning_dset->x[%d][%d]: %12.5g\n", func, ex, xval_idx, learning_dset->x[0][0], ndx-1, jdx-1, learning_dset->x[ndx-1][jdx-1]); fflush(fdbg); } /* Compute optimal feature subsets in `dset' for `nexp' experiments and `nxval' cross-validation subsets. The feature selection chooses optimal `idr' features using `dr_method' and `fscrit' criterion. If `normalize' is 1, the data is optionally normalized first. This function is used to improve (significantly) performance of Model Selection functionality. Model Selection repeats cross-validation estimation for various values of SVM classifier parameters. This process optionally includes (a very time consuming) dimensionality reduction for each experiment and each cross-validation subset. However, there is no need to execute this repeatedly for different SVM parameter values, since the inputs are identical, and therefore the results will be identical. It is sufficient to compute the chosen feature subsets once, and then reuse them for subsequent cross-validation experiments. That is the job of x_fsel(). Optionally write intermediate results in `fname'. The function returns array `fsel'. It contains selected feature subsets for all `nexp' experiments and `nxval' cross-validation subsets. For example, fsel[2][3] is the list of features selected in experiment 2, cross-validation subset 3. In case of failure, return NULL and set `errc'. */ static int ***x_fsel(struct dataset *dset, int nexp, int nxval, int normalize, int dr_method, int idr, int fscrit, char *fname, FILE *fdbg, int *errc) { int status; int i; int j; int k; int idx; int *findex; int *vfx; int **sxc; int **lxc; int ***fsel; FILE *fptr; struct dataset *learning_dset; struct dataset *uset; status = 0; fptr = (FILE *) 0; fsel = malloc(nexp*sizeof(int **)); if (fsel) { for (i = 0; (i < nexp) && !status; i++) { fsel[i] = malloc(nxval*sizeof(int *)); if (!fsel[i]) { status = -1; *errc = errno; } } if (!status && fname) { fptr = fopen(fname, "w"); if (!fptr) { status = -1; *errc = errno; } } for (i = 0; (i < nexp) && !status; i++) { status = xpart(dset->c, dset->nd, nxval, &sxc, &lxc); if (!status) { for (j = 0; (j < nxval) && !status; j++) { status = xset(dset, lxc, sxc, j, &learning_dset, (struct dataset **) 0, (FILE *) 0); if (!status) { if (normalize) status = normalize_attributes(learning_dset, (struct dataset *) 0); if (!status) { if (fdbg) log_subset(fdbg, "x_fsel()", learning_dset, i, j); uset = select_subset(learning_dset, idr, dr_method, fscrit, &findex, 0, errc); dataset_free(uset); if (*errc == 0) { vfx = ivec_clone(findex, idr); free(findex); fsel[i][j] = vfx; if (fname) { for (k = 0; k < idr; k++) { idx = vfx[k]; if (dset->alab) fprintf(fptr, "%s\t", dset->alab[idx]); else fprintf(fptr, "%d\t", idx+1); } fprintf(fptr, "\n"); fflush(fptr); } } else fsel = (int ***) 0; } else fsel = (int ***) 0; dataset_free(learning_dset); } else fsel = (int ***) 0; } mx_free((void **) sxc, dset->c); mx_free((void **) lxc, dset->c); } } if (fname) { status = fclose(fptr); if (status != 0) { *errc = errno; fsel = fsel_free(fsel, nexp, nxval); } } } else { status = -1; *errc = errno; } if (!status) { status = save_fsel(fsel, dset, idr, nexp, nxval); if (status == -1) { *errc = errno; fsel = fsel_free(fsel, nexp, nxval); } } return fsel; } /* Feature extraction equivalent of x_fsel(): compute optimal feature mappings of `dset' for `nexp' experiments and `nxval' cross-validation subsets. The function returns array `fext'. It contains optimal feature mappings (i.e., transformation matrices) for all `nexp' experiments and `nxval' cross-validation subsets. For example, fext[2][3] is the feature transformation matrix selected in experiment 2, cross-validation subset 3. In case of failure, return NULL and set `errc'. */ static float ****x_fext(struct dataset *dset, int nexp, int nxval, int normalize, int dr_method, int idr, int *errc) { int status; int i; int j; int idx; int **sxc; int **lxc; float **map; float ****fext; struct dataset *learning_dset; status = 0; map = (float **) 0; fext = malloc(nexp*sizeof(float ***)); if (fext) { for (i = 0; (i < nexp) && !status; i++) fext[i] = malloc(nxval*sizeof(float **)); for (i = 0; (i < nexp) && !status; i++) { status = xpart(dset->c, dset->nd, nxval, &sxc, &lxc); if (!status) { for (j = 0; (j < nxval) && !status; j++) { status = xset(dset, lxc, sxc, j, &learning_dset, (struct dataset **) 0, (FILE *) 0); if (!status) { if (normalize) status = normalize_attributes(learning_dset, (struct dataset *) 0); if (!status) { if (dr_method == PDR_SVD) map = svd_transform(learning_dset->x, learning_dset->nv, learning_dset->d, errc); else if ((dr_method == PDR_PCA) || (dr_method == PDR_FISHER)) { if (dr_method == PDR_PCA) map = pca(learning_dset, errc); else if (dr_method == PDR_FISHER) map = fld(learning_dset, errc); } else if (dr_method == PDR_EMAP) { idx = ivec_min(learning_dset->nd, learning_dset->c)-1; map = emap(learning_dset, idr, 1, idx, errc); } if (map) fext[i][j] = map; else fext = (float ****) 0; } else fext = (float ****) 0; dataset_free(learning_dset); } else fext = (float ****) 0; } mx_free((void **) sxc, dset->c); mx_free((void **) lxc, dset->c); } } } return fext; } /* In case of two classes, add confusion matrix to the model selection file. */ static void save_cmx(FILE *fptr, int c, float sens, float spec, float ppv, float npv) { if (c == 2) fprintf(fptr, "\t%7.2f\t%7.2f\t%7.2f\t%7.2f", sens, spec, ppv, npv); fprintf(fptr, "\n"); } /* Save the criterion function value and parameters for C-SVM. */ static int save_C_val(float ecrit, float *fvec, int n, int iteration, int c, int kernel_type, float sens, float spec, float ppv, float npv) { int i; int status; FILE *fptr; status = 0; fptr = fopen(PCP_MSL, "a"); if (fptr) { fprintf(fptr, "%7.2f\t", ecrit); for (i = 0; i < n-1; i++) fprintf(fptr, "%12.5g\t", pow(10, fvec[i])); fprintf(fptr, "%12.5g\t%d", pow(10, fvec[n-1]), iteration); save_cmx(fptr, c, sens, spec, ppv, npv); status = fclose(fptr); } else status = -1; return status; } /* Save the criterion function value and parameters for NU-SVM. */ static int save_nu_val(float ecrit, float *fvec, int n, int iteration, int c, int kernel_type, float sens, float spec, float ppv, float npv) { int status; FILE *fptr; status = 0; fptr = fopen(PCP_MSL, "a"); if (fptr) { fprintf(fptr, "%7.2f\t", ecrit); fprintf(fptr, "%12.5g\t", fvec[0]); /* nu */ fprintf(fptr, "%12.5g\t%d", pow(10, fvec[n-1]), iteration); /* gamma */ save_cmx(fptr, c, sens, spec, ppv, npv); status = fclose(fptr); } else status = -1; return status; } /* Save the criterion function value and parameters. */ static int save_val(float ecrit, float *fvec, int n, int iteration, int c, float sens, float spec, float ppv, float npv) { int i; int status; FILE *fptr; status = 0; fptr = fopen(PCP_MSL, "a"); if (fptr) { fprintf(fptr, "%7.2f\t", ecrit); for (i = 0; i < n-1; i++) fprintf(fptr, "%12.5g\t", fvec[i]); fprintf(fptr, "%12.5g\t%d", fvec[n-1], iteration); save_cmx(fptr, c, sens, spec, ppv, npv); status = fclose(fptr); } else status = -1; return status; } /* Create `xpar_crit_parameters' structure for `method'. */ struct xpar_crit_parameters *init_spar(int method, struct dataset *dset, FILE *fdbg) { struct xpar_crit_parameters *xpar_params; xpar_params = calloc(1, sizeof(struct xpar_crit_parameters)); if (xpar_params) { xpar_params->dset = dset; xpar_params->outdev = (FILE *) 0; xpar_params->fdbg = fdbg; xpar_params->vid = 0; xpar_params->eval = FLT_MAX; xpar_params->method = method; xpar_params->nmodels = 1; } return xpar_params; } /* Compute value of xpar() criterion function. xpar() is a wrapper around xlearn(), which computes cross-validation error rate for a given dimension reduction/classifier combination. xpar_func() uses rand() to partition input dataset into learning and validation subsets for cross-validation. The pseudo-random number generator is initialized using srand(parameters->seed) function call. Therefore, to reproduce the results for a given dataset, you need to give the same parameters->seed value. In case of error, set 'errc'. The errors are malloc() and xlearn() errors. */ float xpar_func(float *fvec, int n, int iteration, void *parameters, int *errc) { int idx; int method; int dr_method; int idr; int fscrit; int normalize; int nmodels; int status; int vid; int error_count; int ex; int nexp; int bag_size; int c; int nxval; int i; int t_p; int f_n; int f_p; int t_n; int kernel_type; float ecrit; float amce; float xmce; float sens; float spec; float ppv; float npv; float nu_max; int *findex; int *ccer; int *tccer; int **sxc; int **lxc; float **fext; FILE *outdev; FILE *fdbg; struct xpar_crit_parameters *xpar_parameters; struct dataset *dset; /* training data set */ struct dataset *learning_dset; struct dataset *validation_dset; struct dataset *l_dset; /* learning/validation sets after norm./DR */ struct dataset *v_dset; unsigned int seed; status = 0; error_count = -1; xpar_parameters = (struct xpar_crit_parameters *) parameters; method = xpar_parameters->method; fdbg = xpar_parameters->fdbg; nexp = xpar_parameters->nexp; dset = xpar_parameters->dset; nxval = xpar_parameters->nxval; /* NU-SVM parameter optimization is actually constrained optimization problem, since nu must be in [0, nu_max] (see get_svm_nu_max() for definition of nu_max). Simplex does not support constrained optimization. For now, do this: if nu violates the constraint, return a large value. That should discourage Simplex from going in that direction. */ idx = 0; /* idx = 1 means constraint violation */ if ((method == PALG_SVM) && ((struct svm_parameter *) xpar_parameters->options)->svm_type == NU_SVC) { nu_max = get_svm_nu_xmax(dset->nd, dset->c, nxval); /* Check if nu is outside of range. */ if ((fvec[0] < 0) || (fvec[0] > nu_max)) idx = 1; } if (idx) ecrit = FLT_MAX; else { dr_method = xpar_parameters->dr_method; idr = xpar_parameters->idr; fscrit = xpar_parameters->fscrit; normalize = xpar_parameters->normalize; nmodels = xpar_parameters->nmodels; vid = xpar_parameters->vid; seed = xpar_parameters->seed; outdev = xpar_parameters->outdev; c = dset->c; fdbg = xpar_parameters->fdbg; /* Compute cross-validation error rates for the given learning method and parameters. srand() ensures reproducible results. This call guarantees that each invocation of xpar_func() with identical input arguments will produce identical result. */ srand(seed); tccer = calloc(c, sizeof(int)); /* class-conditional error counts for one experiment */ ecrit = 0.0; sens = 0.0; spec = 0.0; ppv = 0.0; npv = 0.0; for (ex = 0; (ex < nexp) && !status; ex++) { amce = 0.0; status = xpart(c, dset->nd, nxval, &sxc, &lxc); /* lxc[i][j] is number of class i vectors in cross-validation subset j. sxc[i] is list of class i vectors with arranged so that cross-validation subsets are in sequence. For example, if there are 3 cross-validation subsets with 10 vectors each, sxc[i][0..9] is list of class i vectors in subset 0, sxc[i][10..19] is list of class i vectors in subset 1, and sxc[i][20..29] is list of class i vectors in subset 2. */ if (!status) { ivec_set(tccer, c, 0); for (idx = 0; (idx < nxval) && !status; idx++) { status = xset(dset, lxc, sxc, idx, &learning_dset, &validation_dset, fdbg); /* NOTE: in November 2002 decided to fix the size of each bagging set to the cardinality of the training data set. Perhaps revisit. */ bag_size = learning_dset->nv; if (method == PALG_SVM) { if (((struct svm_parameter *) xpar_parameters->options)->svm_type == C_SVC) ((struct svm_parameter *) xpar_parameters->options)->C = pow(10, fvec[0]); else ((struct svm_parameter *) xpar_parameters->options)->nu = fvec[0]; ((struct svm_parameter *) xpar_parameters->options)->gamma = pow(10, fvec[1]); ((struct svm_parameter *) xpar_parameters->options)->verbose = 0; } else if (method == PALG_MLP) { ((struct mlp_options *) xpar_parameters->options)->npl[0] = fvec[0]; ((struct mlp_options *) xpar_parameters->options)->itmax = fvec[1]; } else if (method == PALG_KNN) ((struct knn_options *) xpar_parameters->options)->k = fvec[0]; l_dset = (struct dataset *) 0; v_dset = (struct dataset *) 0; /* If feature subsets or feature mappings are already provided, reduce dimensionality of the datasets and signal to xlearn() not to do any feature selection. */ if (xpar_parameters->fsel) { if (normalize) status = normalize_attributes(learning_dset, validation_dset); if (!status) { if (fdbg) log_subset(fdbg, "xpar_func()", learning_dset, ex, idx); findex = xpar_parameters->fsel[ex][idx]; l_dset = dataset_select(learning_dset, findex, idr); if (l_dset) { v_dset = dataset_select(validation_dset, findex, idr); if (v_dset) error_count = xlearn(method, PDR_NONE, idr, fscrit, 0, l_dset, v_dset, seed, nmodels, bag_size, xpar_parameters->options, &ccer, outdev, fdbg, errc); else status = -1; } } } else if (xpar_parameters->fext) { if (normalize) status = normalize_attributes(learning_dset, validation_dset); if (!status) { fext = xpar_parameters->fext[ex][idx]; l_dset = dataset_map(learning_dset, idr, fext); if (l_dset) { v_dset = dataset_map(validation_dset, idr, fext); if (v_dset) error_count = xlearn(method, PDR_NONE, idr, fscrit, 0, l_dset, v_dset, seed, nmodels, bag_size, xpar_parameters->options, &ccer, outdev, fdbg, errc); else status = -1; } } } else error_count = xlearn(method, dr_method, idr, fscrit, normalize, learning_dset, validation_dset, seed, nmodels, bag_size, xpar_parameters->options, &ccer, outdev, fdbg, errc); if (error_count >= 0) { for (i = 0; i < c; i++) tccer[i] += ccer[i]; free(ccer); amce += error_count; } else status = -1; dataset_free(validation_dset); dataset_free(learning_dset); dataset_free(v_dset); dataset_free(l_dset); } mx_free((void **) sxc, c); mx_free((void **) lxc, c); if (!status) { xmce = 100.0*amce/dset->nv; /* error rate for this experiment */ viprint_line(4, 1, "Experiment %6d cross-validation error rate: " "%6.2f%% ", ex+1, xmce); ecrit += xmce; if (c == 2) { /* NOTE: sensitivity, specificity, positive and negative predictive values (SSPN) are defined based on a confusion matrix, which in turn is defined for a single experiment. Here we have multiple experiments, and thus multiple confusion matrices (one per experiment). The final SSPN are computed as the averages over the experiments. */ f_p = tccer[1]; f_n = tccer[0]; t_p = dset->nd[0]-f_n; t_n = dset->nd[1]-f_p; sens += 100.0*t_p/dset->nd[0]; spec += 100.0*t_n/dset->nd[1]; if (t_p+f_p > 0) ppv += 100.0*t_p/(t_p+f_p); if (t_n+f_n > 0) npv += 100.0*t_n/(t_n+f_n); } } } else *errc = errno; } free(tccer); if (!status) { ecrit = ecrit/nexp; if (c == 2) { sens = sens/nexp; spec = spec/nexp; ppv = ppv/nexp; npv = npv/nexp; } if (ecrit < xpar_parameters->eval) { xpar_parameters->eval = ecrit; xpar_parameters->x1 = fvec[0]; xpar_parameters->x2 = fvec[1]; } if (method == PALG_SVM) { kernel_type = ((struct svm_parameter *) xpar_parameters->options)->kernel_type; if (((struct svm_parameter *) xpar_parameters->options)->svm_type == C_SVC) status = save_C_val(ecrit, fvec, n, iteration, c, kernel_type, sens, spec, ppv, npv); else status = save_nu_val(ecrit, fvec, n, iteration, c, kernel_type, sens, spec, ppv, npv); } else status = save_val(ecrit, fvec, n, iteration, c, sens, spec, ppv, npv); if (status == -1) *errc = errno; if (iteration == 0) viprint_line(6, 1, "Iteration %7d; vertex %7d; %s %7.2f%%", iteration, vid, XPAR_FUNC_MSG, ecrit); else viprint_line(5, 1, "Iteration %7d; %s %7.2f%%", iteration, XPAR_FUNC_MSG, ecrit); /*inverse_video(); fprintf(outdev, "\n");*/ } else ecrit = -1.0; } return ecrit; } int xtest_optimal_malloc(float **tds_x, int tds_nv, int d, float **teds_x, int teds_nv, float ***x, float ***y, float **xmean, float **std) { int status; float *l_xmean; float *l_std; float **l_x; float **l_y; status = -1; l_xmean = fmx_mean(tds_x, tds_nv, d); if (l_xmean) { l_std = fmx_std(tds_x, tds_nv, d); if (l_std) { l_x = fmx_clone(tds_x, tds_nv, d); if (l_x) { l_y = fmx_clone(teds_x, teds_nv, d); if (l_y) { status = 0; *x = l_x; *y = l_y; *xmean = l_xmean; *std = l_std; } } } } return status; } /* Apply optimal parameters to test data set, display and save results. In case of success, set *errc to 0, otherwise set it to the error code. */ void xtest_optimal(struct xpar_crit_parameters *xpar_parameters, int *errc, char **xname) { int status; int i; int d; int idr; int predicted_class; int method; int *nd; float *xmean; float *std; float *output; void *model; float **target; struct dataset *n_tds; struct dataset *n_teds; struct dataset *r_tds; struct dataset *r_teds; struct svm_node *svm_vector = (struct svm_node *) 0; struct svm_problem *problem; struct mlp_options *mlp_options; struct knn_options *knn_options; struct svm_parameter *svm_options; FILE *rcl_fptr; status = 0; model = (void *) 0; predicted_class = 0; d = tds->d; method = xpar_parameters->method; /* Apply normalization and/or dimensionality reduction. */ if (xpar_parameters->normalize) { nd = ivec_clone(tds->nd, tds->c); n_tds = dataset_lt(d, tds->c, nd, tds->nv, (char **) 0, (float **) 0); nd = ivec_clone(teds->nd, teds->c); n_teds = dataset_lt(d, teds->c, nd, teds->nv, (char **) 0, (float **) 0); status = xtest_optimal_malloc(tds->x, tds->nv, d, teds->x, teds->nv, &n_tds->x, &n_teds->x, &xmean, &std); if (!status) { fmx_prenorm(n_tds->x, tds->nv, d, xmean, std); /* Test data set is normalized using mean/std. from training data set. */ fmx_prenorm(n_teds->x, teds->nv, d, xmean, std); } else *errc = errno; } else { n_tds = tds; n_teds = teds; } if (!status) { if (xpar_parameters->dr_method != PDR_NONE) { idr = xpar_parameters->idr; status = reduce_d(n_tds, n_teds, &r_tds, &r_teds, xpar_parameters->dr_method, idr, xpar_parameters->fscrit, errc); if (xpar_parameters->normalize) { dataset_free(n_tds); dataset_free(n_teds); } } else { idr = d; r_tds = n_tds; r_teds = n_teds; } if (!status) { if (method == PALG_SVM) { /* Build the model, apply to the test data set. */ problem = create_problem(r_tds); /* Build probability model. */ svm_options = (struct svm_parameter *) xpar_parameters->options; svm_options->probability = 1; model = svm_train(problem, svm_options); } else if (method == PALG_MLP) { target = mlp_target(r_tds->c, r_tds->nd); mlp_options = (struct mlp_options *) xpar_parameters->options; inverse_video(); model = mlp_learn(mlp_options->opt_method, r_tds->x, r_tds->nv, r_tds->nd, idr, target, mlp_options->nlayers, mlp_options->npl, mlp_options->itmax, mlp_options->range, mlp_options->eta, mlp_options->mu, stdout, 0, (char *) 0, xpar_parameters->seed, errc, xpar_parameters->fdbg); } if (model || (method == PALG_KNN)) { if (!teds->prediction) teds->prediction = malloc(teds->nv*sizeof(int)); rcl_fptr = fopen(PCP_RCL, "w"); if (rcl_fptr) { /* For now, only SVM computes prediction strengths. */ if (method == PALG_SVM) teds->ps = dmx_alloc(teds->nv, teds->c); for (i = 0; (i < teds->nv) && !status; i++) { if (method == PALG_SVM) { svm_vector = create_svm_vector(r_teds->x[i], idr); predicted_class = svm_predict_probability(model, svm_vector, teds->ps[i])-1; predicted_class = svm_predict((struct svm_model *) model, svm_vector)-1; free(svm_vector); } else if (method == PALG_MLP) { output = mlp_output(model, r_teds->x[i]); predicted_class = fvec_argmax(output, r_teds->c); } else if (method == PALG_KNN) { knn_options = (struct knn_options *) xpar_parameters->options; predicted_class = knn(r_teds->x[i], 0, r_tds, xpar_parameters->x1, knn_options->dist, errc, xpar_parameters->fdbg); if (predicted_class == -1) status = -1; } if (!status) { teds->prediction[i] = predicted_class; write_rcl(rcl_fptr, i, teds, tds); } } if (!status) { status = fclose(rcl_fptr); if (!status) { printf("\n"); predict_disp(teds, 0, method); if (method == PALG_SVM) svm_destroy_model((struct svm_model *) model); pwait(); } } } else { *errc = errno; status = -1; if (xname) *xname = strdup(PCP_RCL); } } } if (xpar_parameters->dr_method != PDR_NONE) { dataset_free(r_tds); dataset_free(r_teds); } } } /* Compute optimal feature transformations/subsets for all experiments and all cross-validation subsets. We do it once at the beggining of the simplex/grid search, and subsequently just reuse the results. */ int compute_dr(struct xpar_crit_parameters *xpar_parameters, int dr_method, int idr, int fscrit, FILE *fdbg, int *errc) { int status; status = 0; if ((dr_method == PDR_SVD) || (dr_method == PDR_FISHER) || (dr_method == PDR_PCA) || (dr_method == PDR_EMAP)) { xpar_parameters->fext = x_fext(tds, xpar_parameters->nexp, xpar_parameters->nxval, xpar_parameters->normalize, dr_method, idr, errc); if (!xpar_parameters->fext) status = -1; } else { xpar_parameters->fsel = x_fsel(tds, xpar_parameters->nexp, xpar_parameters->nxval, xpar_parameters->normalize, dr_method, idr, fscrit, PCP_XSF, fdbg, errc); if (!xpar_parameters->fsel) status = -1; } return status; } /* Input model selection parameters common to all algorithms. */ void input_xpar(struct dataset *dset, struct xpar_crit_parameters *xpar_parameters) { int max_nxval; int ex; int dflt; int i; int dr_method; int idr; int fscrit; char *msg; max_nxval = ivec_min(dset->nd, dset->c); xpar_parameters->nxval = input_nxval(stdin, stdout, max_nxval); msg = malloc((PCP_QLEN+1)*sizeof(char)); xpar_parameters->nexp = input_nexp(msg); free(msg); xpar_parameters->seed = input_seed(stdin, stdout); srand(xpar_parameters->seed); dflt = 0; ex = 1; xpar_parameters->normalize = input_integer(stdin, stdout, PCP_UMSG_RAW, PCP_QLEN, &dflt, &dflt, &ex); ex = 0; for (i = 0; i < dset->c; i++) ex += (dset->nd[i]-1)/xpar_parameters->nxval+1; dr_method = input_dr(stdout, dset->nv-ex, dset->d, dset->c, &idr, &fscrit); xpar_parameters->dr_method = dr_method; xpar_parameters->idr = idr; xpar_parameters->fscrit = fscrit; } /* Read MLP-specific model selection parameters. */ void input_xpar_mlp(int *nhl, int *nhh, int *hstep, int *nitl, int *nith, int *itstep) { int min_range; int dflt; char *msg; msg = malloc((PCP_QLEN+1)*sizeof(char)); min_range = 1; dflt = PCP_MLP_DFLT_NHL; sprintf(msg, PCP_UMSG_MLP_MSEL1, dflt); *nhl = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0); min_range = *nhl; dflt = 2*(*nhl); sprintf(msg, PCP_UMSG_MLP_MSEL2, min_range, dflt); *nhh = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0); if (*nhh > *nhl) { min_range = 1; dflt = (*nhh-*nhl)/PCP_MLP_DFLT_HSTP; if (dflt == 0) dflt = 1; sprintf(msg, PCP_UMSG_MLP_MSEL3, min_range, dflt); *hstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0); } else *hstep = 1; if (nitl) { min_range = 1; dflt = PCP_MLP_DFLT_MIN; sprintf(msg, PCP_UMSG_MLP_MSEL4, min_range, dflt); *nitl = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0); min_range = *nitl; dflt = 2*min_range; sprintf(msg, PCP_UMSG_MLP_MSEL5, min_range, dflt); *nith = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0); if (*nith > *nitl) { min_range = 1; dflt = (*nith-*nitl)/PCP_MLP_DFLT_ISTP; if (dflt == 0) dflt = 1; sprintf(msg, PCP_UMSG_MLP_MSEL3, min_range, dflt); *itstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0); } else *itstep = 1; } free(msg); } /* Read k-NN-specific model selection parameters. */ void input_xpar_knn(struct dataset *dset, int nxval, int *kmin, int *kmax, int *kstep) { int i; int min_range; int max_range; int mx; int kval; int dflt; char *msg; mx = 0; msg = malloc((PCP_QLEN+1)*sizeof(char)); for (i = 0; i < dset->c; i++) { kval = dset->nd[i]-dset->nd[i]/nxval; if ((i == 0) || (kval < mx)) mx = kval; } min_range = 1; max_range = mx; dflt = PCP_KNN_DFLT_KMIN; sprintf(msg, PCP_UMSG_KNN_MSEL1, max_range, dflt); *kmin = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, &max_range); min_range = *kmin; max_range = mx; dflt = PCP_KNN_DFLT_KMAX; if (dflt > max_range) dflt = max_range; sprintf(msg, PCP_UMSG_KNN_MSEL2, min_range, max_range, dflt); *kmax = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, &max_range); min_range = 1; dflt = PCP_KNN_DFLT_KSTEP; sprintf(msg, PCP_UMSG_KNN_MSEL3, dflt); *kstep = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &min_range, (int *) 0); free(msg); }