/*
  Module name: pau.c
  Created by: Ljubomir Buturovic
  Created: 02/18/2002
  Purpose: assorted utilities for PCP.
*/  

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <sys/stat.h>
#include <string.h>
#include <errno.h>
#include <stdarg.h>
#include <time.h>
#include "lerr.h"
#include "lau.h"
#include "lmat.h"
#include "lin.h"
#include "pau.h"
#include "ddset.h"
#include "mlp.h"
#include "pcp.h"
#include "hash.h"
#include "hash_util.h"

/*
  Log matrix.
*/
void log_mx(float **mx, int rows, int columns, FILE *fdbg)
{
  int i;
  int j;

  if (fdbg != (FILE *) 0)
    for (i = 0; i < rows; i++)
      {
	fprintf(fdbg, "row %5d: ", i);
	for (j = 0; j < columns; j++)
	  fprintf(fdbg, "%12.5g ", mx[i][j]);
	fprintf(fdbg, "\n");
      }
}

/*
  Log columns of 'mx'.
*/
void log_mxt(float **mx, int rows, int columns, FILE *fdbg)
{
  int i;
  int j;

  if (fdbg != (FILE *) 0)
    for (i = 0; i < columns; i++)
      {
	fprintf(fdbg, "column %5d: ", i);
	for (j = 0; j < rows; j++)
	  fprintf(fdbg, "%12.5g ", mx[j][i]);
	fprintf(fdbg, "\n");
      }
}

/*
  Calculate and display covariance matrix and matrix of linear
  correlation coefficients for data in 'xmx'.
*/
void covar(float **xmx, int d, int nv, int *errc, int dbg)
{
  int   i;
  int   status;
  int   max_row;
  int   max_column;
  float fmax;
  float **sigma;
  FILE  *outdev;
  FILE  *fdbg = (FILE *) 0;

  outdev = fopen("/dev/stdout", "w");
  status = 0;
  if (dbg > 0)
    fdbg = fopen(PCP_DBG, "a");
  sigma = cest(xmx, d, nv, COVARIANCE_MATRIX);
  inverse_video();
  printf("Covariance matrix:\n");
  log_mx(sigma, d, d, stdout);
  fmx_max(sigma, d, d, &max_row, &max_column, &fmax);
  printf("Maximal element (%5d)(%5d): %12.5g\n", max_row+1, 
	 max_column+1, fmax);
  correst(sigma, d);
  printf("\nMatrix of linear correlations:\n");
  log_mx(sigma, d, d, stdout);
  if (d > 1)
    {
      /*
	To find max. element in correlation matrix, first set diagonal
	to large negative values (because by definition, the diagonal
	elements are already 1, and therefore max.)
      */
      for (i = 0; i < d; i++)
	sigma[i][i] = -FLT_MAX;
      fmx_max(sigma, d, d, &max_row, &max_column, &fmax);
      printf("Maximal off-diagonal element (%5d)(%5d): %12.5g\n", max_row+1, 
	     max_column+1, fmax);
    }
  reset_video();
  mx_free((void **) sigma, d);
}

/*
  Terminate 'string' after the first token.
*/
static void token_string(char *string)
{
  char *str;

  str = string+strspn(string, WHITESPACE);
  str = str+strcspn(string, WHITESPACE);
  *str = '\0';
}

/*
  Print question 'msg' on 'outdev', read integer response from
  'indev'.  The function loops until the answer is in ['min_range',
  'max_range']. 'length' is total length of question line.

  If 'dflt' is not NULL, the function returns that value when the user
  enters empty line.

  In case of error return EOF. The errors are bad arguments ('msg',
  'indev' or 'outdev' NULL) or malloc() error. In the latter case set
  'errno'.
*/
int input_integer(FILE *indev, FILE *outdev, char *msg, int length, 
		  int *dflt, int *min_range, int *max_range)
{
  int  idx;
  int  done = 0;
  char *string;
  char *emsg;

  errno = 0;
  idx = EOF;
  if (msg && indev && outdev)
    {
      emsg = malloc((length+1)*sizeof(char));
      string = malloc((P_MAX_LINE_LEN+2)*sizeof(char));
      if (emsg && string)
	while (!done)
	  {
	    print_line(outdev, msg, length);
	    fgets(string, P_MAX_LINE_LEN+1, indev);
	    token_string(string);
	    idx = atoi(string);
	    if (!idx && dflt)
	      {
		idx = *dflt;
		done = 1;
	      }
	    else if (!min_range && !max_range)
	      done = 1;
	    else if (min_range && !max_range)
	      {
		if (idx >= *min_range)
		  done = 1;
		else
		  {
		    sprintf(emsg, "The value must be greater than or equal to %5d.", *min_range);
		    print_line(outdev, emsg, length);
		    fprintf(outdev, "\n");
		  }
	      }
	    else if (!min_range && max_range)
	      {
		if (idx <= *max_range)
		  done = 1;
		else
		  {
		    sprintf(emsg, "The value must be less or equal than %5d.", *max_range);
		    print_line(outdev, emsg, length);
		    fprintf(outdev, "\n");
		  }
	      }
	    else if (min_range && max_range)
	      {
		if ((idx >= *min_range) && (idx <= *max_range))
		  done = 1;
		else
		  {
		    sprintf(emsg, "The value must be between %5d and %5d.", 
			    *min_range, *max_range);
		    print_line(outdev, emsg, length);
		    fprintf(outdev, "\n");
		  }
	      }
	  }
      free(emsg);
      free(string);
    }
  return idx;
}

/*
  Print question 'msg' on 'outdev', read integer response from
  'indev'. The function repeats the question until the response is one
  of the values in vector 'choices' of length 'len'. 'length' is total
  length of question line.

  If 'dflt' is not NULL, the function returns that value when the user
  enters empty line.

  In case of error return EOF. The errors are bad arguments ('msg',
  'indev' or 'outdev' NULL) or malloc() error. In the latter case set
  'errno'.
*/
int input_choice(FILE *indev, FILE *outdev, char *msg, int *dflt, int *choices, int len)
{
  int  i;
  int  idx;
  int  done;
  char *string;

  errno = 0;
  idx = EOF;
  done = 0;
  if (msg && indev && outdev && choices && (len > 0))
    {
      string = malloc((P_MAX_LINE_LEN+2)*sizeof(char));
      if (string)
	while (done == 0)
	  {
	    viprint_line(2, 1, msg);
	    fgets(string, P_MAX_LINE_LEN+1, indev);
	    token_string(string);
	    idx = atoi(string);
	    if ((idx == 0) && dflt)
	      {
		idx = *dflt;
		done = 1;
	      }
	    else 
	      {
		for (i = 0; (i < len) && !done; i++)
		  if (idx == choices[i])
		    done = 1;
	      }
	    if (!done)
	      {
		viprint_line(2, 1, PCP_UMSG_CHOICE);
		printf("\n");
	      }
	  }
      free(string);
    }
  return idx;
  
}

static int decode_format(int if1, int if2)
{
  int format;

  format = DATASET_FF_RAW;
  if ((if1 == 0) && (if2 == 0))
    format = DATASET_FF_RAW;
  else if ((if1 == 1) && (if2 == 0))
    format = DATASET_FF_COL;
  else if ((if1 == 0) && (if2 == 1))
    format = DATASET_FF_VEC;
  else if ((if1 == 1) && (if2 == 1))
    format = DATASET_FF_COLVEC;
  return format;
}

/*
  Read dataset file format from 'indev'.
*/
int input_format(FILE *indev, FILE *outdev)
{
  int  dflt;
  int  min_range;
  int  max_range;
  int  if1;
  int  if2;
  int  format;

  dflt = 0;
  min_range = 0;
  max_range = 1;
  if1 = input_integer(indev, outdev, PCP_UMSG_HFORMAT, PCP_QLEN, &dflt, &min_range, &max_range);
  if2 = input_integer(indev, outdev, PCP_UMSG_RFORMAT, PCP_QLEN, &dflt, &min_range, &max_range);
  format = decode_format(if1, if2);
  return format;
}

/*
  Float version of input_integer(). The input value must be in
  [min_range, max_range].
*/
float input_float(FILE *indev, FILE *outdev, char *msg, int length, 
		  float *dflt, float *min_range, float *max_range)
{
  float fdx;
  int   done = 0;
  int   len;
  char  *string;
  char  *emsg;

  errno = 0;
  fdx = 0.0;
  if (msg && indev && outdev)
    {
      emsg = malloc((length+1)*sizeof(char));
      string = malloc((P_MAX_LINE_LEN+2)*sizeof(char));
      if (emsg && string)
	while (done == 0)
	  {
	    print_line(outdev, msg, length);
	    fgets(string, P_MAX_LINE_LEN+1, indev);
	    token_string(string);
	    len = strspn(string, DIGITS);
	    string[len] = '\0';
	    fdx = (float) atof(string);
	    if ((fdx == 0) && (dflt != (float *) 0))
	      {
		fdx = *dflt;
		done = 1;
	      }
	    else if (!min_range && !max_range)
	      done = 1;
	    else if (min_range && !max_range)
	      {
		if (fdx >= *min_range)
		  done = 1;
		else
		  {
		    sprintf(emsg, "The value must be greater than or equal to %5f.", *min_range);
		    print_line(outdev, emsg, length);
		    fprintf(outdev, "\n");
		  }
	      }
	    else if (!min_range && !max_range)
	      {
		if (fdx <= *max_range)
		  done = 1;
		else
		  {
		    sprintf(emsg, "The value must be less or equal %5f.", *max_range);
		    print_line(outdev, emsg, length);
		    fprintf(outdev, "\n");
		  }
	      }
	    else if (min_range && max_range)
	      {
		if ((fdx >= *min_range) && (fdx <= *max_range))
		  done = 1;
		else
		  {
		    sprintf(emsg, "The value must be between %5f and %5f.", 
			    *min_range, *max_range);
		    print_line(outdev, emsg, length);
		    fprintf(outdev, "\n");
		  }
	      }
	  }
      free(emsg);
      free(string);
    }
  return fdx;
}

/*
  Print question 'msg' on 'outdev', read response from 'indev'. Total
  length of question line is 'length'. The function returns the string
  response. The max. name of the response is limited to the maximum
  length of the file name in the current filesystem, as determined by
  statfs().

  It is the caller's responsibility to free() the returned string.

  If user enters empty string, returns empty string. In case of
  illegal arguments, returns (char *) 0 and sets errno to 0. In case
  of error, returns (char *) 0 and sets errno.
*/
static char *input_fname(FILE *indev, FILE *outdev, char *msg, int length)
{
  int  maxlen;
  char *string;
  char *xtr;

  string = (char *) 0;
  errno = 0;
  if (msg && indev && outdev && (length > 0))
    {
      maxlen = get_namelen();
      if (maxlen != -1)
	{
	  string = malloc((maxlen+1)*sizeof(char));
	  if (string)
	    {
	      print_line(outdev, msg, length);
	      fgets(string, maxlen+1, indev);
	      xtr = strrchr(string, '\n');
	      if (xtr)
		*xtr = '\0';
	      /*
		Remove trailing CR, if present. This may happen when
		reading filenames on Windows.
	      */
	      xtr = strrchr(string, '\r');
	      if (xtr)
		*xtr = '\0';
	    }
	}
    }
  return string;
}

/*
  Get filename from stdin. 'dflt' is the default filename. The query
  message is 'txt[dflt]:' or 'txt', however at most PCP_QLEN of the
  message is printed.
*/
char *input_filename(char *txt, char *dflt, FILE *outdev)
{
  char *message;
  char *fname;

  message = malloc((PCP_QLEN+1)*sizeof(char));
  if (dflt)
    snprintf(message, PCP_QLEN+1, "%s[%s]:", txt, dflt);
  else
    snprintf(message, PCP_QLEN+1, "%s", txt);
  fname = input_fname(stdin, outdev, message, PCP_QLEN);
  free(message);
  if ((!fname || !strlen(fname)) && dflt)
    fname = strdup(dflt);
  return fname;
}

/*
  Read input data: number of features, number of classes, number of
  vectors per class, and input files.

  Return the populated 'dataset' structure. In case of error, return
  (struct dataset *) 0 and set 'errc' to error code. Error codes are
  PERR_BAD_INPUT_FILE, LERR_FILE_FORMAT, or <errno.h> error codes. In
  case of file-related errors, 'fname' has name of file which caused
  the error.
*/
struct dataset *pcp_input(FILE *indev, FILE *outdev, int *errc, char **xname)
{
  int  i;
  int  status;
  int  line_count;
  int  min_range;
  int  errno_save;
  int  ntok;
  int  fmt;
  int  c;
  int  d;
  char *msg = (char *) 0;
  int  *nd;
  char **fnames;
  struct dataset *dset;

  *errc = 0;
  d = 0;
  status = 0;
  dset = (struct dataset *) 0;
  fmt = input_format(indev, outdev);
  min_range = 1;
  c = input_integer(indev, outdev, PCP_UMSG_NCLASSES, PCP_QLEN, 
		    (int *) 0, &min_range, (int *) 0);
  if (c != EOF)
    {
      nd = malloc(c*sizeof(int));
      if (nd)
	{
	  msg = malloc((PCP_QLEN+1)*sizeof(char));
	  if (msg)
	    {
	      fnames = calloc(c, sizeof(char *));
	      if (fnames)
		{
		  for (i = 0; (i < c) && (status == 0); i++)
		    {
		      /* 
			 Read the file names and check the format.
		      */
		      sprintf(msg, "%s %5d:", PCP_UMSG_CLASS_FNAME, i+1);
		      fnames[i] = input_fname(indev, outdev, msg, PCP_QLEN);
		      if (fnames[i] == (char *) 0)
			status = -1;
		      else
			{
			  line_count = file_info(fnames[i], (int *) 0, &ntok, '\0');
			  if (i == 0)
			    {
			      if ((fmt == DATASET_FF_RAW) || (fmt == DATASET_FF_COL))
				d = ntok;
			      else if ((fmt == DATASET_FF_VEC) || (fmt == DATASET_FF_COLVEC))
				d = ntok-1;
			    }
			  if ((line_count != -1) && (d > 0))
			    {
			      if ((fmt == DATASET_FF_RAW) || (fmt == DATASET_FF_VEC))
				nd[i] = line_count;
			      else if ((fmt == DATASET_FF_COL) || (fmt == DATASET_FF_COLVEC))
				nd[i] = line_count-1;
			    }
			  else
			    {
			      status = -1;
			      *xname = strdup(fnames[i]);
			      if (d == -1)
				*errc = PERR_BAD_INPUT_FILE;
			      if (ntok == -1)
				*errc = LERR_VAR_NCOL;
			      mx_free((void **) fnames, c);
			    }
			}
		    }
		  if (!status)
		    {
		      dset = load_dataset(d, c, nd, fnames, fmt, errc, xname);
		      if (!dset)
			status = -1;
		    }
		}
	    }
	  else
	    status = -1;
	}
      else
	status = -1;
    }
  else
    status = -1;
  if (status == -1) 
    {
      errno_save = errno;
      dset = dataset_free(dset);
      if (!*errc)
	*errc = errno_save;
    }
  free(msg);
  return dset;
}

/*
  Print 'msg' to 'outdev', centered within a string of 'length'
  characters. If 'msg' is longer than 'length', it is truncated.
*/
void center_line(FILE *outdev, char *msg, int length)
{
  int  len;
  int  l1;
  int  l2;
  char *prefix;
  char *suffix;
  char *str;

  len = strlen(msg);
  if (len >= length)
    {
      str = strdup(msg);
      str[length] = '\0';
      printf("%s", str);
    }
  else
    {
      l1 = (length-len)/2;
      l2 = length-len-l1;
      prefix = malloc(l1+1);
      suffix = malloc(l2+1);
      memset(prefix, ' ', l1);
      prefix[l1] = '\0';
      memset(suffix, ' ', l2);
      suffix[l2] = '\0';
      fprintf(outdev, "%s%s%s", prefix, msg, suffix);
    }
}

/*
  Print 'msg' to 'outdev'. Fill remainder up to 'length' with space
  characters.
*/
void print_line(FILE *outdev, char *msg, int length)
{
  int  len;
  char blank[PCP_QLEN+1];

  if (outdev)
    {
      inverse_video();
      if (msg && *msg)
	len = length-strlen(msg);
      else
	len = length;
      if (len > 0)
	{
	  memset(blank, ' ', len);
	  blank[len] = '\0';
	  fprintf(outdev, "%s%s\n", msg, blank);
	}
      else
	{
	  msg[length] = '\0';
	  fprintf(outdev, "%s\n", msg);
	}
      reset_video();
    }
}

/*
  Similar to print_line(), but doesn't set video, and doesn't print
  newline.
*/
void print_ln(FILE *outdev, char *msg, int length)
{
  int  len;
  char *ptr;
  char blank[PCP_QLEN+1];

  if (msg && *msg)
    {
      if ((ptr = strrchr(msg, '\n')))
	*ptr = '\0';
      len = length-strlen(msg);
    }
  else
    len = length;
  memset(blank, ' ', len);
  blank[len] = '\0';
  fprintf(outdev, "%s%s", msg, blank);
}

static void vpr_line(va_list VariableArgsPtr)
{
  int  i;
  int  len;
  int  msglen;
  int  nlines;
  int  nrem;
  char *format;
  char *msg;
  char *blank;

  msg = malloc((PCP_LONG_LEN+1)*sizeof(char));
  format = va_arg(VariableArgsPtr, char *);
  vsnprintf(msg, PCP_LONG_LEN+1, format, VariableArgsPtr);
  msglen = strlen(msg);
  nlines = msglen/PCP_QLEN; /* number of complete lines */
  nrem = msglen-nlines*PCP_QLEN; /* number of characters in the incomplete line */
  if (nrem > 0)
    {
      len = PCP_QLEN-nrem; /* number of blank characters to fill the incomplete line */
      blank = malloc((len+1)*sizeof(char));
      memset(blank, ' ', len);
      blank[len] = '\0';
      msglen += len;
    }
  else
    blank = strdup("");
  strcat(msg, blank);
  for (i = 0; i < msglen; i++)
    {
      if ((i > 0) && ((i%PCP_QLEN) == 0))
	putchar('\n');
      putchar(msg[i]);
    }
  putchar('\n');
  va_end(VariableArgsPtr);
  free(msg);
  free(blank);
}

/*
  Print fixed-length message, with variable number of
  arguments. 'nargs' is the number of arguments following 'nargs'. The
  arguments are the same as for printf() and friends.

  TBD: this function doesn't seem to work... and is unused
  anyway. Remove? ljb, 11/26/2005.
*/
void vprint_line(int nargs, ...)
{
  va_list VariableArgsPtr;

  va_start(VariableArgsPtr, nargs);
  vpr_line(VariableArgsPtr);
  va_end(VariableArgsPtr);
}

/*
  Like vprint_line(), with inverse video. First argument after `nargs'
  is an integer. If it equals one, display message in inverse video,
  otherwise display in normal video.
*/
void viprint_line(int nargs, ...)
{
  int  video;
  va_list VariableArgsPtr;

  va_start(VariableArgsPtr, nargs);
  video = va_arg(VariableArgsPtr, int);
  if (video)
    inverse_video();
  vpr_line(VariableArgsPtr);
  va_end(VariableArgsPtr);
  if (video)
    reset_video();
}

/*
  Return pseudo-random number in [0, 1].
*/
float float_rand(void)
{
  float frand;

  frand = rand()/(RAND_MAX+1.0);
  return frand;
}

/*
  Display control functions. 
*/

#define STR_RESET  "[0m"

void cursor_on(void)
{
  char escape;

  escape = ESCAPE_CHAR;
  printf("%c[?25h", escape);
  fflush(stdout);
}

void cursor_off(void)
{
  char escape;

  escape = ESCAPE_CHAR;
  printf("%c[?25l", escape);
  fflush(stdout);
}

void reset_video(void)
{
  int  fd;
  int  count;
  char escape;
  char buf[10];

  fd = fileno(stdout);
  escape = ESCAPE_CHAR;
  buf[0] = escape;
  strcpy(&buf[1], STR_RESET);
  count = strlen(STR_RESET);
  write(fd, buf, count+1);
}

void clear_screen(void)
{
  char escape;

  escape = ESCAPE_CHAR;
  printf("%c[2J", escape);
  cpos(0, 0);
}

/*
  Position cursor at (x, y). x is column, y is row. (0, 0) is upper
  left corner.
*/
void cpos(int x, int y)
{
  char escape;
  char xdigit1;
  char xdigit2;
  char ydigit1;
  char ydigit2;
  
  escape = ESCAPE_CHAR;
  xdigit1 = '0'+(x+1)/10;
  xdigit2 = '0'+(x+1)%10;
  ydigit1 = '0'+(y+1)/10;
  ydigit2 = '0'+(y+1)%10;
  printf("%c[%c%c;%c%cH", escape, ydigit1, ydigit2, xdigit1, xdigit2);
}

void inverse_video(void)
{
  char escape;
  char color[3];

  escape = ESCAPE_CHAR;
  memcpy(color, PCP_VT100_COLOR, strlen(PCP_VT100_COLOR)); 
  color[2] = '\0';
  printf("%c[0;7;1;%sm", escape, color);
}

unsigned int input_seed(FILE *indev, FILE *outdev)
{
  int  min_range;
  unsigned int seed;
  char *msg;

  min_range = 1;
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  sprintf(msg, PCP_UMSG_SEED, min_range);
  seed = input_integer(stdin, outdev, msg, PCP_QLEN, &min_range, &min_range, (int *) 0);
  free(msg);
  return seed;
}

int input_nmodels(FILE *indev, FILE *outdev)
{
  int  min_range;
  int  int_default;
  int  nmodels;
  char *msg;

  min_range = 1;
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  int_default = 10;
  sprintf(msg, PCP_UMSG_NCOMB, int_default);
  nmodels = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range, (int *) 0);
  free(msg);
  return nmodels;
}

/*
  Enter dimension of transformed space for linear dimension
  reduction. The arguments have obvious meanings.
*/
int input_d(FILE *indev, FILE *outdev, int max_value, int default_value)
{
  int  min_range;
  int  max_range;
  int  int_default;
  int  d;
  char *msg;

  min_range = 1;
  max_range = max_value;
  int_default = default_value;
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  sprintf(msg, PCP_UMSG_NDR, max_range, int_default);
  d = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range, &max_range);
  free(msg);
  return d;
}

/*
  Enter number of features to select for feature subset selection.
*/
int input_nfeat(FILE *indev, FILE *outdev, int max_value, int default_value)
{
  int  min_range;
  int  max_range;
  int  int_default;
  int  d;
  char *msg;

  min_range = 1;
  max_range = max_value;
  int_default = default_value;
  msg = malloc((PCP_LONG_LEN+1)*sizeof(char));
  sprintf(msg, PCP_UMSG_NFEAT, max_range, int_default);
  d = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range, &max_range);
  free(msg);
  return d;
}

int input_nexp(char *msg)
{
  int dflt;
  int idx;
  int nexp;

  dflt = PCP_DFLT_NEXP; 
  snprintf(msg, PCP_QLEN+1, PCP_UMSG_NEXP, dflt);
  idx = 1;
  nexp = input_integer(stdin, stdout, msg, PCP_QLEN, &dflt, &idx, (int *) 0);
  return nexp;
}

int input_nxval(FILE *indev, FILE *outdev, int max_nxval)
{
  int  min_range;
  int  int_default;
  int  max_range;
  int  nxval;
  char *msg;

  min_range = 2;
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  if (max_nxval >= 10)
    int_default = 10;
  else
    int_default = max_nxval;
  max_range = max_nxval;
  sprintf(msg, PCP_UMSG_NXVAL, min_range, max_range, int_default);
  nxval = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, 
			&min_range, &max_range);
  free(msg);
  return nxval;
}

/*
  Read MLP parameters from stdin.
*/
int input_mlp(FILE *outdev, int inputs, int outputs, int nvec, int *mlp_continue, 
	      int *nlayers, int **npl, int *itmax, float *range, int *opt_method, 
	      float *eta, float *mu, int *nxval, int max_nxval, int *nmlp,
	      unsigned int *seed, char **fname)
{
  int   i;
  int   status = 0;
  int   ixd = 0;
  int   min_range;
  int   int_default;
  int   max_range;
  int   new_mlp_flag;
  int   nlay;
  float minf_range;
  float maxf_range;
  float float_default;
  int   *xpl;
  char  *msg;
  FILE  *fptr;

  fptr = (FILE *) 0;
  new_mlp_flag = 0;
  clear_screen();
  cursor_on();
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  if (mlp_continue)
    {
      min_range = 0;
      max_range = 1;
      ixd = input_integer(stdin, outdev, "Begin learning (0), or continue (1) [0]:",
			  PCP_QLEN, &min_range, &min_range, &max_range);
      *mlp_continue = ixd;
    }
  if (!mlp_continue || (*mlp_continue == 0))
    new_mlp_flag = 1;
  if (fname)
    {
      if (nxval)
	*fname = input_filename(PCP_UMSG_XMP_FNAME, PCP_XMP, outdev);
      else
	*fname = input_filename(PCP_UMSG_FNAME_MLP, PCP_MLP, outdev);
      if (new_mlp_flag == 1)
	fptr = fopen(*fname, "w");
      else
	fptr = fopen(*fname, "r");
    }
  if (fptr || !fname)
    {
      if (fname)
	fclose(fptr);
      if (new_mlp_flag == 1)
	{
	  if (nlayers)
	    {
	      min_range = 1;
	      ixd = input_integer(stdin, outdev, PCP_UMSG_NHIDDEN, PCP_QLEN, &min_range, &min_range, (int *) 0);
	    }
	  else
	    ixd = 1;
	}
      if (ixd != EOF)
	{
	  if (new_mlp_flag == 1)
	    {
	      nlay = ixd+1;
	      if (nlayers)
		{
		  *nlayers = nlay;
		  sprintf(msg, "Number of inputs is %5d.", inputs);
		  print_line(outdev, msg, PCP_QLEN);
		  sprintf(msg, "Number of output nodes is %5d.", outputs);
		  print_line(outdev, msg, PCP_QLEN);
		  printf("\n");
		  xpl = malloc(nlay*sizeof(int));
		  for (i = 0; i < nlay-1; i++)
		    {
		      min_range = 1;
		      sprintf(msg, PCP_UMSG_NNODES, i+1);
		      ixd = input_integer(stdin, outdev, msg, PCP_QLEN, (int *) 0, &min_range, (int *) 0);
		      xpl[i] = ixd;
		    }
		  xpl[nlay-1] = outputs;
		  *npl = xpl;
		}
	    }
	  if (new_mlp_flag == 1)
	    {
	      if (seed)
		*seed = input_seed(stdin, outdev);
	      minf_range = 0.01;
	      float_default = 1.0;
	      sprintf(msg, PCP_UMSG_INIT_WEIGHTS, float_default);
	      *range = input_float(stdin, outdev, msg, PCP_QLEN, &float_default, 
				   &minf_range, (float *) 0);
	    }

	  min_range = 1;
	  max_range = 2;
	  int_default = MLP_OPT_CGPLUS;
	  sprintf(msg, PCP_UMSG_OPT_METHOD, MLP_OPT_CGPLUS, MLP_OPT_GRADIENT_DESCENT, int_default);
	  *opt_method = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default,
				      &min_range, (int *) 0);
	  if (*opt_method == MLP_OPT_GRADIENT_DESCENT)
	    {
	      minf_range = FLT_MIN;
	      float_default = 0.01;
	      sprintf(msg, PCP_UMSG_LRATE, float_default);
	      *eta = input_float(stdin, outdev, msg, PCP_QLEN, &float_default, 
				 &minf_range, (float *) 0);
	      minf_range = 0.0;
	      maxf_range = 1.0-FLT_MIN;
	      float_default = 0.0;
	      sprintf(msg, PCP_UMSG_MOMENTUM, float_default);
	      *mu = input_float(stdin, outdev, msg, PCP_QLEN, &float_default, 
				&minf_range, &maxf_range);
	    }

	  if (itmax)
	    {
	      min_range = -1;
	      int_default = 100;
	      sprintf(msg, PCP_UMSG_NMAXIT, int_default);
	      *itmax = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range,
				     (int *) 0);
	    }
	  if (new_mlp_flag == 1)
	    if (nxval)
	      *nxval = input_nxval(stdin, outdev, max_nxval);
	  if (nmlp)
	    {
	      int_default = 10;
	      min_range = 1;
	      sprintf(msg, PCP_UMSG_NMLP, int_default);
	      *nmlp = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range,
				    (int *) 0);
	    }
	}
      else
	status = -1;
    }
  else
    status = -1;
  free(msg);
  return status;
}

/*
  Input k-NN distance code.
*/
int input_knn_dist(FILE *outdev)
{
  int   dist;
  int   *dch;
  int   int_default;
  char  *msg;

  msg = malloc((PCP_QLEN+1)*sizeof(char));
  int_default = EUCLIDEAN_DIST;
  dch = malloc(PCP_N_DIST*sizeof(int));
  dch[0] = EUCLIDEAN_DIST;
  dch[1] = CITY_BLOCK_DIST;
  dch[2] = MAHALANOBIS_DIST;
  snprintf(msg, PCP_QLEN+1, PCP_UMSG_KDIST, dch[0], dch[1], dch[2], int_default);
  dist = input_choice(stdin, outdev, msg, &int_default, dch, PCP_N_DIST);
  free(dch);
  free(msg);
  return dist;
}

/*
  Get ensemble variant of the original method (bagging or boosting).
*/
int input_ensemble_method(int method)
{
  int   mtd;
  int   dtd;
  int   dch[2];
  int   int_default;
  char  *msg;

  msg = malloc((PCP_QLEN+1)*sizeof(char));
  dch[0] = 0;
  dch[1] = 1;
  int_default = 0;
  snprintf(msg, PCP_QLEN+1, PCP_UMSG_ENSEMBLE, dch[0], dch[1], int_default);
  dtd = input_choice(stdin, stdout, msg, &int_default, dch, PCP_N_DIST);
  if (method == PALG_SVM)
    {
      if (dtd == 0)
	mtd = PALG_BAG_SVM;
      else 
	mtd = PALG_ADABOOST_SVM;
    }
  free(msg);
  return mtd;
}

/*
  Get transformation (SVD, scaling) mode.

  0: use training and test sets to compute the transformation
  1: use only the training set
*/
int input_transform_mode()
{
  int   mode;
  int   dch[2];
  int   int_default;
  char  *msg;

  msg = malloc((PCP_QLEN+1)*sizeof(char));
  dch[0] = 0;
  dch[1] = 1;
  int_default = 0;
  snprintf(msg, PCP_QLEN+1, PCP_UMSG_SVD_MODE, dch[1], dch[0], int_default);
  mode = input_choice(stdin, stdout, msg, &int_default, dch, 2);
  free(msg);
  return mode;
}

/*
  Read k-NN parameters from stdin.

  In case of failure, return -1 and set errno. Possible errors are
  fopen() errors.
*/
int input_knn(FILE *outdev, int *nxval, int max_nxval, int *nmlp, int *k, int maxk,
	      unsigned int *seed, char **fname, int *dist)
{
  int   status = 0;
  int   min_range;
  int   max_range;
  int   int_default;
  int   new_mlp_flag;
  char  *msg;
  FILE  *fptr;

  new_mlp_flag = 0;
  clear_screen();
  cursor_on();
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  int_default = 1;
  min_range = 1;
  if (maxk > 0)
    {
      sprintf(msg, PCP_UMSG_NN, maxk, int_default);
      max_range = maxk;
      *k = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range, 
			 &max_range);
    }
  else
    {
      sprintf(msg, PCP_UMSG_NN1, int_default);
      *k = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range,
			 (int *) 0);
    }
  *dist = input_knn_dist(outdev);
  if (fname)
    {
      if (nxval)
	*fname = input_filename(PCP_UMSG_XNN, PCP_XNN, outdev);
      else
	*fname = input_filename(PCP_UMSG_ONAME, PCP_KNN, outdev); /* currently unused */
      /*
	Check if we can write to the file.
      */
      fptr = fopen(*fname, "w");
      if (fptr)
	fclose(fptr);
      else
	status = -1;
    }
  if (!status)
    {
      if (nxval)
	*nxval = input_nxval(stdin, outdev, max_nxval);
      if (nmlp)
	{
	  int_default = 10;
	  min_range = 1;
	  sprintf(msg, PCP_UMSG_KSUB, int_default);
	  *nmlp = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range,
				(int *) 0);
	}
      if (seed)
	*seed = input_seed(stdin, outdev);
    }
  vx_free(msg);
  return status;
}

/*
  Read feature selection criterion. Used to evaluate feature subsets.

  Note that PCP_FSEL_GOLUB is only defined for two classes.
*/
int input_fscrit(FILE *outdev, int dr_method, int c)
{
  int  fscrit;
  int  int_default;
  int  len;
  int  *fch;
  char *msg;

  msg = malloc((PCP_LONG_LEN+1)*sizeof(char));
  len = 0;
  fch = (int *) 0;
  if (dr_method == PDR_RANKING)
    {
      int_default = PCP_FSEL_PEARSON;
      fch = malloc(PCP_N_FSCRIT*sizeof(int));
      if (c == 2)
	{
	  fch[len++] = PCP_FSEL_EUCLIDEAN;
	  fch[len++] = PCP_FSEL_PEARSON;
	  fch[len++] = PCP_FSEL_GOLUB;
	  fch[len++] = PCP_FSEL_KNN;
	  fch[len++] = PCP_FSEL_BAYES;
	  sprintf(msg, PCP_UMSG_FSEL_1, fch[0], fch[1], fch[2], fch[3], fch[4], int_default);
	}
      else
	{
	  fch[len++] = PCP_FSEL_EUCLIDEAN;
	  fch[len++] = PCP_FSEL_PEARSON;
	  fch[len++] = PCP_FSEL_KNN;
	  fch[len++] = PCP_FSEL_BAYES;
	  sprintf(msg, PCP_UMSG_FSEL_2, fch[0], fch[1], fch[2], fch[3], int_default);
	}
    }
  else if ((dr_method == PDR_FORWARD) || (dr_method == PDR_BACKWARD) || 
	   (dr_method == PDR_PLUS_L_MINUS_R) || (dr_method == PDR_BB) || 
	   (dr_method == PDR_FLOAT_FORWARD))
    {
      fch = malloc(PCP_N_SUB_FSCRIT*sizeof(int));
      int_default = PCP_FSUB_KNN;
      fch[len++] = PCP_FSUB_KNN;
      fch[len++] = PCP_FSUB_IN_IN;
      fch[len++] = PCP_FSUB_BAYES;
      sprintf(msg, PCP_UMSG_FSEL_3, fch[0], fch[1], fch[2], int_default);
    }
  fscrit = input_choice(stdin, outdev, msg, &int_default, fch, len);
  free(fch);
  free(msg);
  return fscrit;
}


/*
  Read dimension reduction parameters: method, dimension of
  transformed space, and feature subset evaluation criterion, for
  feature selection methods.

  The function returns the chosen method, and sets the dimension of
  transformed space in `idr' and the feature subset evaluation
  criterion in `fscrit'.

  The methods offered are based on number of vectors 'nv' and and
  input data dimensionality 'd'. The logic is:

                     available dimensionality reduction methods

  nv <= d            none, individual, SVD, EMAP
  nv > d             none, individual, PCA, FLD, EMAP

  The 'none' option is the default.
*/
int input_dr(FILE *outdev, int nv, int d, int c, int *idr, int *fscrit)
{
  int  int_default;
  int  min_range;
  int  max_range;
  int  dr_method;
  int  len;
  int  choice[50]; /* we don't expect more than 50 dimension reduction methods, ever */
  char *msg;
  
  int_default = PDR_NONE;
  *idr = d;
  dr_method = PDR_NONE;
  choice[0] = PDR_NONE;
  len = 1;
  msg = malloc((PCP_LONG_LEN+1)*sizeof(char));
  /*
    NOTE: the function will display at most PCP_QLEN characters.
   */
  if (nv <= d)
    {
      sprintf(msg, PCP_UMSG_DR_SM, PDR_NONE, PDR_SVD, PDR_EMAP, PDR_RANKING,
	      PDR_FORWARD, PDR_BACKWARD, int_default);
      choice[len++] = PDR_RANKING;
      choice[len++] = PDR_SVD;
      choice[len++] = PDR_EMAP;
      choice[len++] = PDR_FORWARD;
      choice[len++] = PDR_BACKWARD;
#if 0
      choice[len++] = PDR_PLUS_L_MINUS_R;
      choice[len++] = PDR_BB;
#endif
    }
  else if (nv > d)
    {
      sprintf(msg, PCP_UMSG_DR_LG, PDR_NONE, PDR_FISHER, PDR_PCA, PDR_EMAP, 
	      PDR_RANKING, PDR_FORWARD, PDR_BACKWARD, int_default);
      choice[len++] = PDR_RANKING;
      choice[len++] = PDR_PCA;
      choice[len++] = PDR_FISHER;
      choice[len++] = PDR_EMAP;
      choice[len++] = PDR_FORWARD;
      choice[len++] = PDR_BACKWARD;
#if 0
      choice[len++] = PDR_PLUS_L_MINUS_R;
      choice[len++] = PDR_BB;
#endif
    }
  dr_method = input_choice(stdin, outdev, msg, &int_default, choice, len);
  if (!dr_method)
    dr_method = PDR_NONE;
  else
    {
      /*
	In case the chosen dimensionality reduction method is a
	feature selection method, choose the feature subset evaluation
	criterion.
      */
      if (fscrit)
	{
	  if ((dr_method == PDR_RANKING) || (dr_method == PDR_FORWARD) ||
	      (dr_method == PDR_BACKWARD) || (dr_method == PDR_PLUS_L_MINUS_R) ||
	      (dr_method == PDR_BB) || (dr_method == PDR_FLOAT_FORWARD))
	    *fscrit = input_fscrit(outdev, dr_method, c);
	}
      /*
	Get dimension.
      */
      min_range = 1;
      if (dr_method == PDR_SVD)
	max_range = nv;
      else if (dr_method == PDR_FISHER)
	max_range = int_min(d, c-1);
      else
	max_range = d;
      int_default = max_range;
      sprintf(msg, PCP_UMSG_NDIM, min_range, max_range, int_default);
      *idr = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range,
			   &max_range);
    }
  free(msg);
  return dr_method;
}

/*
  Read feature selection method.
*/
int input_fsel(FILE *outdev)
{
  int  int_default;
  int  dr_method;
  int  len;
  int  choice[50]; /* we don't expect more than 50 feature selection methods, ever */
  char *msg;
  
  int_default = PDR_RANKING;
  dr_method = PDR_NONE;
  choice[0] = PDR_NONE;
  len = 1;
  msg = malloc((PCP_LONG_LEN+1)*sizeof(char));
  sprintf(msg, PCP_UMSG_FEATSEL, PDR_RANKING, PDR_FORWARD, PDR_BACKWARD, PDR_RANKING);
  choice[len++] = PDR_RANKING;
  choice[len++] = PDR_FORWARD;
  choice[len++] = PDR_BACKWARD;
#if 0
  choice[len++] = PDR_PLUS_L_MINUS_R;
  choice[len++] = PDR_BB;
#endif
  dr_method = input_choice(stdin, outdev, msg, &int_default, choice, len);
  if (!dr_method)
    dr_method = PDR_NONE;
  free(msg);
  return dr_method;
}

#define PAU_DISPNAME_LEN    16

/*
  Return index of next non-empty class.
*/
static int next_class(int tcl, int *nd, int c)
{
  int itcl;

  itcl = tcl;
  if (itcl < c-1)
    {
      itcl++; /* next class */
      /*
	If this test class is empty (which is perfectly legal), we
	have to skip to the next non-empty one.
      */
      while ((itcl <= c-1) && (nd[itcl] == 0))
	itcl++;
    }
  return itcl;
}

/*
  Write prediction for vector `idx' in `dset' to 'fptr' in PCP_RCL
  format. 

  TBD: `tset' is the training dataset. It is used to extract the
  actual class name for the input vector. This is brain-dead; it is a
  consequence of the current one-class-per-file input format. A better
  solution is to use actual class labels (dataset.label).
*/
void write_rcl(FILE *fptr, int idx, struct dataset *dset, struct dataset *tset)
{
  int  i;
  int  predicted_class;
  int  actual_class;
  int  fmt;
  char *name;
  char *tname;

  if (fptr)
    {
      predicted_class = dset->prediction[idx];
      name = bname(tset->fnames[predicted_class]);
      actual_class = dataset_class(idx, dset->c, dset->nd); /* actual class for vector idx */
      tname = bname(dset->fnames[actual_class]);
      fmt = dset->format;
      if ((fmt == DATASET_FF_VEC) || (fmt == DATASET_FF_COLVEC))
	fprintf(fptr, "%s\t%s\t%s\t", dset->xlab[idx], tname, name);
      else
	fprintf(fptr, "%d\t%s\t%s\t", idx+1, tname, name);
      /*
	The last column is `1' for correct prediction, `0' for
	incorrect.
      */
      if (predicted_class == actual_class)
	fprintf(fptr, "1");
      else
	fprintf(fptr, "0");
      if (dset->c == 2)
	{
	  /*
	    Additional four columns for two classes. The columns are 1
	    if the prediction is TP, FN, FP, TN, respectively, and 0
	    otherwise. Obviously, exactly one column will have a 1 :-)
	    And yes, it could be all be encoded with one column, but
	    the is more consistent with the other file formats in PCP.
	  */
	  fprintf(fptr, "\t");
	  if (actual_class == 0)
	    {
	      if (actual_class == predicted_class)
		fprintf(fptr, "1\t0\t0\t0"); /* TP */
	      else
		fprintf(fptr, "0\t1\t0\t0"); /* FN */
	    }
	  else
	    {
	      if (actual_class == predicted_class) 
		fprintf(fptr, "0\t0\t0\t1"); /* TN */
	      else
		fprintf(fptr, "0\t0\t1\t0"); /* FP */
	    }
	}
      /*
	Optionally store class prediction strengths.
      */
      if (dset->ps)
	{
	  fprintf(fptr, "\t");
	  for (i = 0; i < dset->c; i++)
	    fprintf(fptr, "%g\t", dset->ps[idx][i]);
	}
      fprintf(fptr, "\n");
      free(tname);
      free(name);
    }
}
  
/*
  Display classification results for 'test_set'. The prediction were
  obtained using 'algorithm'.

  The function employs the following logic to evaluate and display the
  predictions.

  If test_set->c > 1, we assume that the test_set class labels are the
  correct classifications of the test samples, and compute error rates
  correspondingly. The test_set file names are used as class labels.

  If test_set->c == 1, we interpret that to mean that the actual class
  labels for the test_set are not known, and therefore error rates
  cannot be calculated; hence the function just displays the (integer)
  class IDs.

  If actual class labels can be established using the above logic, and
  if 'verbose' is 1, the function displays class prediction for all
  vectors. If 'verbose' is 0, only misclassified vectors are
  displayed.

  If actual class labels are unknown (i.e., test_set->c == 1),
  'verbose' is ignored, and the function displays prediction for all
  vectors.
*/
void predict_disp(struct dataset *test_set, int verbose, int algorithm)
{
  int   i;
  int   tcl;
  int   icx;
  int   coffset;
  int   tmc;
  int   n_P;
  int   T_P;
  int   F_P;
  int   n_N;
  int   T_N;
  int   F_N;
  float e_P;
  float e_N;
  float mcf;
  float ppv;
  float npv;
  int   *mmc;
  int   *pmc;
  char  *tname;
  char  *disp_name;
  char  *name;
  
  inverse_video();
  printf(PCP_LINE);
  e_P = -1.0;
  e_N = -1.0;
  tmc = 0;
  name = (char *) 0;
  if (test_set->c > 1)
    {
      printf("|        Class         |  Actual/predicted card.  |        Error rate        |\n");
      printf(PCP_LINE);
      tcl = 0;
      coffset = test_set->nd[0];
      mmc = calloc(test_set->c, sizeof(int)); /* number of misclassified samples in class i */
      pmc = calloc(test_set->c, sizeof(int)); /* predicted cardinality of class i */
      for (i = 0; i < test_set->nv; i++)
	{
	  if (i == coffset)
	    {
	      tcl = next_class(tcl, test_set->nd, test_set->c);
	      coffset += test_set->nd[tcl];
	    }
	  icx = test_set->prediction[i];
	  if (icx != tcl) /* tcl is actual class of test vector i */
	    mmc[tcl]++;
	  pmc[icx]++;
	}
      tmc = ivec_sum(mmc, test_set->c); /* total number of misclassified samples */
      icx = ivec_sum(test_set->nd, test_set->c);
      i = 0;
      mcf = tmc;
      mcf = 100.0*mcf/icx;
      printf("|                      |       %6d/%-6d      |  %6.2f%% (%6d/%6d) |\n", 
	     test_set->nv, test_set->nv, mcf, tmc, icx);
      for (i = 0; i < test_set->c; i++)
	{
	  mcf = mmc[i];
	  if (test_set->nd[i] > 0)
	    mcf = mcf/test_set->nd[i];
	  else
	    mcf = 0.0;
	  if (test_set->c == 2)
	    {
	      if (i == 0)
		e_P = mcf;
	      else
		e_N = mcf;
	    }
	  mcf = 100.0*mcf;
	  tname = bname(test_set->fnames[i]);
	  disp_name = lau_str_lim(tname, PAU_DISPNAME_LEN);
	  printf("|%4d/%s |       %6d/%-6d      |  %6.2f%% (%6d/%6d) |\n", 
		 i+1, disp_name, test_set->nd[i], pmc[i], mcf, mmc[i], test_set->nd[i]);
	  free(disp_name);
	  free(tname);
	}
      /*
	Display additional performance measures if number of classes is 2.
      */
      if (test_set->c == 2)
	{
	  n_P = test_set->nd[0];
	  T_P = n_P-mmc[0];
	  F_P = mmc[1];
	  n_N = test_set->nd[1];
	  T_N = n_N-mmc[1];
	  F_N = mmc[0];
	  if (T_P+F_P > 0)
	    ppv = 100.0*T_P/(T_P+F_P);
	  else
	    ppv = 0.0;
	  if (T_N+F_N > 0)
	    npv = 100.0*T_N/(T_N+F_N);
	  else
	    npv = 0.0;
	  printf(PCP_LINE);
	  printf("| Sensitivity                                     |  %6.2f%% (%6d/%6d) |\n",
		 100.0*(1.0-e_P), T_P, n_P);
	  printf("| Specificity                                     |  %6.2f%% (%6d/%6d) |\n",
		 100.0*(1.0-e_N), T_N, n_N);
	  printf("| Positive Predictive Value                       |  %6.2f%% (%6d/%6d) |\n", 
		 ppv, T_P, T_P+F_P);
	  printf("| Negative Predictive Value                       |  %6.2f%% (%6d/%6d) |\n", 
		 npv, T_N, T_N+F_N);
	}
      printf(PCP_LINE);
    }
  if ((test_set->c == 1) || (tmc > 0) || verbose)
    {
      if (test_set->xlab)
	{
	  if (test_set->c > 1)
	    {
	      if ((algorithm == PALG_MLP) || (algorithm == PALG_BAG_MLP))
		printf("| Vector               | Actual class             | MLP prediction           |\n");
	      else if ((algorithm == PALG_SVM) || (algorithm == PALG_BAG_SVM))
		printf("| Vector               | Actual class             | SVM prediction           |\n");
	      else if ((algorithm == PALG_LIN) || (algorithm == PALG_BAG_LIN))
		printf("| Vector               | Actual class             | Lin. discriminant pred.  |\n");
	      else if ((algorithm == PALG_KNN) || (algorithm == PALG_BAG_KNN))
		printf("| Vector               | Actual class             | k-NN prediction          |\n");
	      else if ((algorithm == PALG_PLC) || (algorithm == PALG_BAG_PLC))
		printf("| Vector               | Actual class             | Parametric lin. pred.    |\n");
	      else if ((algorithm == PALG_PQC) || (algorithm == PALG_BAG_PQC))
		printf("| Vector               | Actual class             | Parametric quadr. pred.  |\n");
	    }
	  else
	    {
	      if ((algorithm == PALG_MLP) || (algorithm == PALG_BAG_MLP))
		printf("| Vector                         | MLP prediction                            |\n");
	      else if ((algorithm == PALG_SVM) || (algorithm == PALG_BAG_SVM))
		printf("| Vector                         | SVM prediction                            |\n");
	      else if ((algorithm == PALG_LIN) || (algorithm == PALG_BAG_LIN))
		printf("| Vector                         | Linear discriminant prediction            |\n");
	      else if ((algorithm == PALG_KNN) || (algorithm == PALG_BAG_KNN))
		printf("| Vector                         | k-NN prediction                           |\n");
	      else if ((algorithm == PALG_PLC) || (algorithm == PALG_BAG_PLC))
		printf("| Vector                         | Parametric linear prediction              |\n");
	      else if ((algorithm == PALG_PQC) || (algorithm == PALG_BAG_PQC))
		printf("| Vector                         | Parametric quadratic prediction           |\n");
	    }
	}
      else
	{
	  if (test_set->c > 1)
	    {
	      if ((algorithm == PALG_MLP) || (algorithm == PALG_BAG_MLP))
		printf("| Vector |          Actual  class          |         MLP  prediction         |\n");
	      else if ((algorithm == PALG_SVM) || (algorithm == PALG_BAG_SVM))
		printf("| Vector |          Actual  class          |         SVM  prediction         |\n");
	      else if ((algorithm == PALG_LIN) || (algorithm == PALG_BAG_LIN))
		printf("| Vector |          Actual  class          | Linear discriminant prediction  |\n");
	      else if ((algorithm == PALG_KNN) || (algorithm == PALG_BAG_KNN))
		printf("| Vector |          Actual  class          |         k-NN prediction         |\n");
	      else if ((algorithm == PALG_PLC) || (algorithm == PALG_BAG_PLC))
		printf("| Vector |          Actual  class          |  Parametric linear prediction   |\n");
	      else if ((algorithm == PALG_PQC) || (algorithm == PALG_BAG_PQC))
		printf("| Vector |          Actual  class          | Parametric quadratic prediction |\n");
	    }
	  else
	    {
	      if ((algorithm == PALG_MLP) || (algorithm == PALG_BAG_MLP))
		printf("| Vector |            File name            |         MLP  prediction         |\n");
	      else if ((algorithm == PALG_SVM) || (algorithm == PALG_BAG_SVM))
		printf("| Vector |            File name            |         SVM  prediction         |\n");
	      else if ((algorithm == PALG_LIN) || (algorithm == PALG_BAG_LIN))
		printf("| Vector |            File name            | Linear discriminant prediction  |\n");
	      else if ((algorithm == PALG_KNN) || (algorithm == PALG_BAG_KNN))
		printf("| Vector |            File name            |         k-NN prediction         |\n");
	      else if ((algorithm == PALG_PLC) || (algorithm == PALG_BAG_PLC))
		printf("| Vector |            File name            |  Parametric linear prediction   |\n");
	      else if ((algorithm == PALG_PQC) || (algorithm == PALG_BAG_PQC))
		printf("| Vector |            File name            | Parametric quadratic prediction |\n");
	    }
	}
      printf(PCP_LINE);
      tcl = 0;
      coffset = test_set->nd[0];
      for (i = 0; i < test_set->nv; i++)
	{
	  if ((test_set->c > 1) && (i == coffset))
	    {
	      tcl = next_class(tcl, test_set->nd, test_set->c);
	      coffset += test_set->nd[tcl];
	    }
	  tname = bname(test_set->fnames[tcl]);
	  icx = test_set->prediction[i];
	  if (icx == PCP_UNASSIGNED)
	    name = strdup(PCP_UNASSIGNED_NAME);
	  else if (test_set->c > 1)
	    name = bname(test_set->fnames[icx]);
	  if (test_set->xlab)
	    {
	      if (test_set->c == 1)
		printf("| %-30s | %41d |\n", test_set->xlab[i], icx+1);
	      else if ((verbose == 1) || (icx != tcl))
		printf("| %-20s | %-23s  | %-23s  |\n", test_set->xlab[i], tname, name);
	    }
	  else
	    {
	      if (test_set->c == 1)
		printf("| %6d | %-30s  | %30d  |\n", i+1, tname, icx+1);
	      else if ((verbose == 1) || (icx != tcl))
		printf("| %6d | %-30s  | %-30s  |\n", i+1, tname, name);
	    }
	  free(tname);
	  free(name);
	}
      printf(PCP_LINE);
    }
  reset_video();
  cursor_off();
}

/*
  Extract 'subset_id'-th 'training_set' and 'test_set' from
  'x'. 'test_set' is obtained by extracting vectors listed in 'sx'
  from 'x' (see xss() for structure of 'sx'). 'training_set' has the
  remaining vectors in 'x'.
*/
int extract_sets(float **x, int subset_id, int c, int *nd, int d, 
		 int **sx, int **lx, float ***training_set, float ***test_set, 
		 FILE *fdbg)
{
  int   status;
  int   i;
  int   j;
  int   itx;
  int   ntraining;
  int   ntest;
  int   nvec;
  int   idx;
  int   ivx;
  int   ixc;
  int   cx;
  int   isx;
  int   trpos;
  int   tepos;
  int   lxd;
  float **trset;
  float **teset;
  hash_t *xhash;

  status = 0;
  ntest = 0;
  teset = (float **) 0;
  for (i = 0; i < c; i++)
    ntest += lx[i][subset_id];
  nvec = ivec_sum(nd, c);
  ntraining = nvec-ntest;
  trset = fmx_alloc(ntraining, d);
  if (test_set)
    teset = fmx_alloc(ntest, d);
  ixc = 0; /* current position within 'x' array */
  cx = 0; /* cumulative cardinality of classes 0..i-1. */
  tepos = 0; /* current position within test set being extracted from 'x' */
  trpos = 0; /* current position within training set being extracted from 'x' */
  for (i = 0; i < c; i++)
    {
      /*
	Store all 'subset_id' vector indices into 'xhash'. The indices
	are positions within array 'x'.
      */
      xhash = hashCreate();
      isx = ivec_sum(lx[i], subset_id); /* position within 'sx' array */
      for (j = 0; j < lx[i][subset_id]; j++)
	{
	  idx = sx[i][isx++];
	  hashPutInt(xhash, idx, idx);
	}
      for (j = cx; j < cx+nd[i]; j++)
	{
	  ivx = hashGetInt(xhash, j);
	  if (fdbg != (FILE *) 0)
	    {
	      if (ivx == j)
		fprintf(fdbg, "extract_sets(); class %d; moving %d into teset[%d]; ",
			i, ixc, tepos);
	      else
		fprintf(fdbg, "extract_sets(); class %d; moving %d into trset[%d]; ",
			i, ixc, trpos);
	    }
	  if (ivx == j)
	    {
	      if (fdbg)
		{
		  fprintf(fdbg, "test set vector:\t");
		  lxd = d;
		  if (d > P_MAX_LOGD)
		    lxd = P_MAX_LOGD;
		  for (itx = 0; itx < lxd; itx++)
		    fprintf(fdbg, "%f ", x[ixc][itx]);
		  fprintf(fdbg, "\n");
		}
	      if (test_set)
		fvec_copy(teset[tepos++], x[ixc++], d);
	    }
	  else
	    {
	      if (fdbg)
		{
		  lxd = d;
		  if (d > P_MAX_LOGD)
		    lxd = P_MAX_LOGD;
		  fprintf(fdbg, "training set vector:\t");
		  for (itx = 0; itx < lxd; itx++)
		    fprintf(fdbg, "%f ", x[ixc][itx]);
		  fprintf(fdbg, "\n");
		}
	      fvec_copy(trset[trpos++], x[ixc++], d);
	    }
	}
      hashDelete(xhash);
      cx += nd[i];
    }
  *training_set = trset;
  if (test_set)
    *test_set = teset;
  return status;
}

static void log_tset(FILE *fdbg, char *msg, int c, int d, int *nd, float **tset)
{
  int lxd;
  int jx;
  int itx;
  int nvec;
  int ctx;
  int offset;

  offset = 0;
  lxd = d;
  if (d > P_MAX_LOGD)
    lxd = P_MAX_LOGD;
  nvec = ivec_sum(nd, c);
  if (nvec > 0)
    {
      if (msg != (char *) 0)
	fprintf(fdbg, "%s", msg);
      if (nd)
	offset = nd[0];
      ctx = 0;
      for (jx = 0; jx < nvec; jx++)
	{
	  if (jx == offset)
	    offset += nd[++ctx];
	  fprintf(fdbg, "%d\t", ctx+1);
	  for (itx = 0; itx < lxd; itx++)
	    fprintf(fdbg, "%f\t", tset[jx][itx]);
	  fprintf(fdbg, "\n");
	}
      fprintf(fdbg, "\n");
      fflush(fdbg);
    }
}

void log_tt(FILE *fdbg, char *msg, int c, int d, int *training_nd, 
	    int *test_nd, float **training_set, float **test_set)
{
  char *tmsg = (char *) 0;

  if (fdbg != (FILE *) 0)
    {
      if (msg != (char *) 0)
	{
	  tmsg = malloc(strlen(msg)+100);
	  sprintf(tmsg, "%s training set\n", msg);
	}
      log_tset(fdbg, tmsg, c, d, training_nd, training_set);
      if (msg != (char *) 0)
	sprintf(tmsg, "%s test set\n", msg);
      log_tset(fdbg, tmsg, c, d, test_nd, test_set);
      free(tmsg);
    }
}

/*
  Log cross-validation subset 'idx'.
*/
void log_ites(FILE *fdbg, int idx, int c, int nv, int **lxc, int **sxc)
{
  int jx;
  int ix;
  int mx;
  int sflg;
  int offset;
  int coffset;

  if (fdbg)
    {
      fprintf(fdbg, "nxval:\t%d\t", idx+1);
      coffset = 0;
      for (jx = 0; jx < c; jx++)
	{
	  offset = ivec_sum(lxc[jx], idx);
	  for (mx = coffset; mx < coffset+tds->nd[jx]; mx++)
	    {
	      sflg = 0;
	      for (ix = 0; (ix < lxc[jx][idx]) && !sflg; ix++)
		if (mx == sxc[jx][offset+ix])
		  sflg = 1;
	      fprintf(fdbg, "%d\t", sflg);
	    }
	  coffset += tds->nd[jx];
	}
      fprintf(fdbg, "\n");
      fprintf(fdbg, "nxval:\t%d\t", idx+1);
      for (jx = 0; jx < c; jx++)
	{
	  offset = ivec_sum(lxc[jx], idx);
	  for (ix = 0; ix < lxc[jx][idx]; ix++)
	    fprintf(fdbg, "%d\t", sxc[jx][offset+ix]);
	}
      fprintf(fdbg, "\n");
    }
}

/*
  Calculate and display eigenvalues and condition number of covariance
  matrices of training data set.
*/
void p_eigen(int *errc)
{
  int   i;
  int   status;
  int   offset;
  float **evlc;
  float **sigma;

  offset = 0;
  evlc = malloc(tds->c*sizeof(float *));
  for (i = 0; i < tds->c; i++)
    {
      sigma = cest(&tds->x[offset], tds->d, tds->nd[i], COVARIANCE_MATRIX);
      offset += tds->nd[i];
      /*
	Calculate eigenvalues.
      */
      status = eigsn(sigma, tds->d, &evlc[i], (float ***) 0, errc);
    }
  if (!*errc)
    {
      ddevl(tds->c, tds->d, evlc);
      pwait();
    }
}

/*
  Calculate inverse covariance matrices and determinants for TDS. In
  case of success, set 'errc' to 0, otherwise set to error code.
  Possible errors are <errno.h> codes for malloc()/calloc() functions,
  and LERR_SINGCOV (singular covariance matrix).
*/
void p_sigma(int *errc)
{
  *errc = 0;
  dataset_sigma(tds, errc);
}

/*
  Log error message in error file.
*/
static void log_error(int exit_code, char *msg)
{
  time_t log_time;
  char   time_buf[26];
  char   *str;
  FILE   *fptr;

  log_time = time(NULL);
  fptr = fopen(PCP_ERR, "a");
  ctime_r(&log_time, time_buf);
  str = strchr(time_buf, '\n');
  *str = '\0';
  if (fptr)
    {
      fprintf(fptr, "%s %4d  %s\n", time_buf, exit_code, msg);
      fclose(fptr);
    }
}

static void print_msg(int maxlen, char *msg, int log, int exit_code)
{
  int    i;
  int    ipad;

  ipad = maxlen-strlen(msg);
  for (i = 0; i < ipad/2; i++)
    putc(' ', stdout);
  printf("%s", msg);
  for (i = 0; i < ipad/2; i++)
    putc(' ', stdout);
  if (log)
    log_error(exit_code, msg);
}

struct pcp_stat *hashPutStat(hash_t *hashtable, int key, struct pcp_stat *value)
{
  int  status;
  char *keyptr;
  struct pcp_stat *valptr;
  struct pcp_stat *retval = (struct pcp_stat *) 0;

  keyptr = malloc(MAX_INT_LENGTH);
  sprintf(keyptr, "%d", key);
  if (hashtable && value)
    if (!hash_lookup(hashtable, keyptr))
      {
	valptr = malloc(sizeof(struct pcp_stat));
	if (valptr)
	  {
	    memcpy(valptr, value, sizeof(struct pcp_stat));
	    status = hash_alloc_insert(hashtable, keyptr, valptr);
	    if (status == 1)
	      retval = value;
	  }
      }
  return retval;
}

struct pcp_stat *hashGetStat(hash_t *hashtable, int key)
{
  char *keyptr;
  struct pcp_stat *value = (struct pcp_stat *) 0;
  hnode_t *hn;

  keyptr = malloc(MAX_INT_LENGTH);
  sprintf(keyptr, "%d", key);
  hn = hash_lookup(hashtable, keyptr);
  if (hn)
    value = (struct pcp_stat *) hnode_get(hn);
  free(keyptr);
  return value;
}

void pcp_stat_set(struct pcp_stat *pcp_status, int internal_code, int exit_code, char *msg)
{
  pcp_status->internal_code = internal_code;
  pcp_status->exit_code = exit_code;
  if (msg)
    pcp_status->msg = strdup(msg);
  else
    pcp_status->msg = (char *) 0;
}

/*
  Create hash table of PCP status and error codes. Each element in the
  table is a struct pcp_status, which has internal code, exit code,
  and msg corresponding to the status. The exit code is a translation
  of internal code into legal exit status values, which have to be in
  the range [1..255]. This is because UNIX programs return 'status &
  0377' - i.e., the lower byte of the status code passed to exit() -
  to the parent process; see exit(3) for more details.

  Note that this means we only have 127 codes available for PCP status
  (since [0..128] seem to be taken, at least in Linux). If we need
  more, we are doomed. Also, this function needs to be updated every
  time a new error code is introduced.
*/
hash_t *create_status_table(void)
{
  int    code;
  struct pcp_stat *pcp_status;
  hash_t *table;
  
  table = hashCreate();
  /*
    Do not translate native UNIX (errno) error codes, which we believe
    are in the range 1..127.
  */
  pcp_status = calloc(1, sizeof(struct pcp_stat));
  for (code = 0; code <= PMAX_ERRNO; code++)
    {
      pcp_stat_set(pcp_status, code, code, (char *) 0);
      hashPutStat(table, code, pcp_status);
    }
  pcp_stat_set(pcp_status, PERR_UNDEFINED_TDS, code++, PMSG_UNDEFINED_TDS);
  hashPutStat(table, PERR_UNDEFINED_TDS, pcp_status);

  pcp_stat_set(pcp_status, PERR_UNDEFINED_TEDS, code++, PMSG_UNDEFINED_TEDS);
  hashPutStat(table, PERR_UNDEFINED_TEDS, pcp_status);

  pcp_stat_set(pcp_status, PERR_UNDEFINED, code++, PMSG_UNDEFINED);
  hashPutStat(table, PERR_UNDEFINED, pcp_status);

  pcp_stat_set(pcp_status, PERR_TWO_CLASS, code++, PMSG_TWO_CLASS);
  hashPutStat(table, PERR_TWO_CLASS, pcp_status);

  pcp_stat_set(pcp_status, PERR_ONE_CLASS, code++, PMSG_ONE_CLASS);
  hashPutStat(table, PERR_ONE_CLASS, pcp_status);

  pcp_stat_set(pcp_status, PERR_ONE_SAMPLE, code++, PMSG_ONE_SAMPLE);
  hashPutStat(table, PERR_ONE_SAMPLE, pcp_status);

  pcp_stat_set(pcp_status, PERR_BAD_INPUT, code++, (char *) 0);
  hashPutStat(table, PERR_BAD_INPUT, pcp_status);

  pcp_stat_set(pcp_status, PERR_BAD_INPUT_FILE, code++, (char *) 0);
  hashPutStat(table, PERR_BAD_INPUT_FILE, pcp_status);

  pcp_stat_set(pcp_status, PERR_INC_DIM, code++, (char *) 0);
  hashPutStat(table, PERR_INC_DIM, pcp_status);

  pcp_stat_set(pcp_status, PERR_INCONSISTENT_MLP, code++, PMSG_INCONSISTENT_MLP);
  hashPutStat(table, PERR_INCONSISTENT_MLP, pcp_status);

  pcp_stat_set(pcp_status, PERR_UNRECOGNIZED_MLP, code++, PMSG_UNRECOGNIZED_MLP);
  hashPutStat(table, PERR_UNRECOGNIZED_MLP, pcp_status);

  pcp_stat_set(pcp_status, PERR_UNRECOGNIZED_SVM, code++, PMSG_UNRECOGNIZED_SVM);
  hashPutStat(table, PERR_UNRECOGNIZED_SVM, pcp_status);

  pcp_stat_set(pcp_status, PERR_INCONSISTENT_SVD, code++, PMSG_INCONSISTENT_SVD);
  hashPutStat(table, PERR_INCONSISTENT_SVD, pcp_status);

  pcp_stat_set(pcp_status, PERR_INCONSISTENT_FE, code++, PMSG_INCONSISTENT_FE);
  hashPutStat(table, PERR_INCONSISTENT_FE, pcp_status);

  pcp_stat_set(pcp_status, PERR_INCONSISTENT_MAP, code++, PMSG_INCONSISTENT_MAP);
  hashPutStat(table, PERR_INCONSISTENT_MAP, pcp_status);

  pcp_stat_set(pcp_status, PERR_INCONSISTENT_FILE, code++, PMSG_INCONSISTENT_FILE);
  hashPutStat(table, PERR_INCONSISTENT_FILE, pcp_status);

  pcp_stat_set(pcp_status, LERR_FILE_FORMAT, code++, LMSG_FILE_FORMAT);
  hashPutStat(table, LERR_FILE_FORMAT, pcp_status);

  pcp_stat_set(pcp_status, LERR_DESCENT, code++, LMSG_DESCENT);
  hashPutStat(table, LERR_DESCENT, pcp_status);

  pcp_stat_set(pcp_status, LERR_INTERNAL, code++, LMSG_INTERNAL);
  hashPutStat(table, LERR_INTERNAL, pcp_status);

  pcp_stat_set(pcp_status, LERR_INCONSISTENT_MODEL, code++, LMSG_INCONSISTENT_MODEL);
  hashPutStat(table, LERR_INCONSISTENT_MODEL, pcp_status);

  pcp_stat_set(pcp_status, LERR_INCONSISTENT_LIN, code++, LMSG_INCONSISTENT_LIN);
  hashPutStat(table, LERR_INCONSISTENT_LIN, pcp_status);

  pcp_stat_set(pcp_status, LERR_SINGCOV, code++, LMSG_SINGCOV);
  hashPutStat(table, LERR_SINGCOV, pcp_status);

  pcp_stat_set(pcp_status, LERR_SINGULAR, code++, LMSG_SINGULAR);
  hashPutStat(table, LERR_SINGULAR, pcp_status);

  pcp_stat_set(pcp_status, LERR_ITMAX, code++, LMSG_ITMAX);
  hashPutStat(table, LERR_ITMAX, pcp_status);

  pcp_stat_set(pcp_status, LERR_VAR_NCOL, code++, LMSG_VAR_NCOL);
  hashPutStat(table, LERR_VAR_NCOL, pcp_status);

  return table;
}

/*
  Print message in the designated area. The function has variable
  number of arguments. 'nargs' is the number of arguments following
  'nargs'. The following arguments are int (message code) and, if
  nargs is larger than 1, (char *).  The second argument is name
  (usually of a file causing error), if message code is non-zero. If
  message code is zero, the second argument is printed as is.

  TBD: change name. The name is bad since not all messages are
  necessarily error messages.
*/
void errmsg(int nargs, ...)
{
  int    i;
  int    code;
  int    ipad;
  int    maxlen;
  int    len;
  int    exit_code;
  char   *str;
  char   *pstr;
  char   *estr;
  char   *name;
  char   *tname;
  hash_t *status_table;
  struct pcp_stat *pcp_status;
  va_list VariableArgsPtr;

  code = 0;
  exit_code = 0;
  str = (char *) 0;
  tname = (char *) 0;
  va_start(VariableArgsPtr, nargs);
  for (i = 0; i < nargs; i++)
    {
      if (i == 0)
	code = va_arg(VariableArgsPtr, int);
      else if (i == 1)
	{
	  name = va_arg(VariableArgsPtr, char *);
	  if (name && *name)
	    {
	      tname = strdup(name);
	      str_trim(tname);
	    }
	}
    }
  /*
    'code' is PCP internal error code, 'exit_code' is the user-visible
    exit code. We need to store the exit_code in the error file.
   */
  if (code != 0)
    {
      status_table = create_status_table();
      pcp_status = hashGetStat(status_table, code);
      if (pcp_status)
	exit_code = pcp_status->exit_code;
      else
	exit_code = code;
      hashDelete(status_table);
    }
  maxlen = 73;
  cpos(3, 20);
  inverse_video();
  /*
    These error codes require special handling because they come with
    a file name.
  */
  if ((code == EACCES) || (code == ENOENT) || (code == PERR_UNRECOGNIZED_MLP) ||
      (code == PERR_UNRECOGNIZED_SVM) || (code == PERR_INCONSISTENT_MLP) ||
      (code == PERR_BAD_INPUT_FILE) || (code == PERR_INCONSISTENT_FILE) ||
      (code == PERR_ILLEGAL_RNK_FILE))
    {
      if (code == EACCES)
	{
	  if (tname)
	    str = PMSG_EACCES;
	  else
	    str = PMSG_FILE_ACCESS;
	}
      else if (code == ENOENT)
	str = PMSG_ENOENT;
      else if (code == PERR_UNRECOGNIZED_MLP)
	str = PMSG_UNRECOGNIZED_MLP;
      else if (code == PERR_UNRECOGNIZED_SVM)
	str = PMSG_UNRECOGNIZED_SVM;
      else if (code == PERR_INCONSISTENT_MLP)
	str = PMSG_INCONSISTENT_MLP;
      else if (code == PERR_BAD_INPUT_FILE)
	str = PMSG_BAD_INPUT_FILE;
      else if (code == PERR_INCONSISTENT_FILE)
	str = PMSG_INCONSISTENT_FILE;
      else if (code == PERR_ILLEGAL_RNK_FILE)
	str = PMSG_ILLEGAL_RNK_FILE;
      ipad = maxlen-strlen(str)-1;
      if (tname)
	ipad -= strlen(tname);
      for (i = 0; i < ipad/2; i++)
	putc(' ', stdout);
      len = strlen(str)+5;
      if (tname)
	len += strlen(tname);
      pstr = malloc(len*sizeof(char));
      if (tname)
	sprintf(pstr, "%s %s.", str, tname);
      else
	sprintf(pstr, "%s.", str);
      printf("%s", pstr);
      log_error(exit_code, pstr);
      free(pstr);
#if 0
      for (i = 0; i < 5; i++)
	putc(' ', stdout);
#endif
    }
  else if (code == PERR_UNDEFINED_TDS)
    print_msg(maxlen, PMSG_UNDEFINED_TDS, 1, exit_code);
  else if (code == PERR_UNDEFINED_TEDS)
    print_msg(maxlen, PMSG_UNDEFINED_TEDS, 1, exit_code);
  else if (code == PERR_UNDEFINED)
    print_msg(maxlen, PMSG_UNDEFINED, 1, exit_code);
  else if (code == PERR_INCONSISTENT_SVD)
    print_msg(maxlen, PMSG_INCONSISTENT_SVD, 1, exit_code);
  else if (code == PERR_INCONSISTENT_FE)
    print_msg(maxlen, PMSG_INCONSISTENT_FE, 1, exit_code);
  else if (code == PERR_INCONSISTENT_MAP)
    print_msg(maxlen, PMSG_INCONSISTENT_MAP, 1, exit_code);
  else if (code == PERR_TWO_CLASS)
    print_msg(maxlen, PMSG_TWO_CLASS, 1, exit_code);
  else if (code == PERR_ONE_CLASS)
    print_msg(maxlen, PMSG_ONE_CLASS, 1, exit_code);
  else if (code == PERR_ONE_SAMPLE)
    print_msg(maxlen, PMSG_ONE_SAMPLE, 1, exit_code);
  else if (code == LERR_FILE_FORMAT)
    print_msg(maxlen, LMSG_FILE_FORMAT, 1, exit_code);
  else if (code == LERR_DESCENT)
    print_msg(maxlen, LMSG_DESCENT, 1, exit_code);
  else if (code == LERR_LNSEARCH)
    print_msg(maxlen, LMSG_LNSEARCH, 1, exit_code);
  else if (code == LERR_SINGCOV)
    print_msg(maxlen, LMSG_SINGCOV, 1, exit_code);
  else if (code == LERR_SINGULAR)
    print_msg(maxlen, LMSG_SINGULAR, 1, exit_code);
  else if (code == LERR_INCONSISTENT_LIN)
    print_msg(maxlen, LMSG_INCONSISTENT_LIN, 1, exit_code);
  else if (code == LERR_INCONSISTENT_MODEL)
    print_msg(maxlen, LMSG_INCONSISTENT_MODEL, 1, exit_code);
  else if (code == LERR_VAR_NCOL)
    print_msg(maxlen, LMSG_VAR_NCOL, 1, exit_code);
  else if (code == PSTS_DONE)
    print_msg(maxlen, PMSG_DONE, 0, exit_code);
  else if (code == PSTS_INVCOV)
    print_msg(maxlen, PMSG_INVCOV, 0, exit_code);
  else if (code == PERR_BAD_INPUT)
    print_msg(maxlen, PMSG_BAD_INPUT, 1, exit_code);
  else if (code != 0)
    {
      if ((code > 0) && (code <= PMAX_ERRNO))
	{
	  estr = strerror(code);
	  pstr = strdup(estr);
	  ipad = maxlen-strlen(pstr);
	}
      else
	{
	  ipad = maxlen-strlen(PMSG_UNRECOGNIZED_ERROR)-7;
	  pstr = malloc((strlen(PMSG_UNRECOGNIZED_ERROR)+10)*sizeof(char));
	  sprintf(pstr, "%s%6d.", PMSG_UNRECOGNIZED_ERROR, code);
	}
      for (i = 0; i < ipad/2; i++)
	putc(' ', stdout);
      printf("%s", pstr);
      log_error(code, pstr);
      free(pstr);
    }
  else if ((code == 0) && (nargs >= 2) && tname)
    {
      /*
	If error code is 0, and there is a message argument, simply
	display it.
       */
      ipad = maxlen-strlen(tname);
      for (i = 0; i < ipad/2; i++)
	putc(' ', stdout);
      printf("%s", tname);
      for (i = 0; i < ipad-ipad/2; i++)
	putc(' ', stdout);
    }
  fflush(stdout);
  reset_video();
  free(tname);
}

/*
  Classify test data set using linear classifier 'method' in a
  file. 'method' can be PALG_LIN/PALG_BAG_LIN for linear discriminant
  classifier, or PALG_PLC/PALG_BAG_PLC for parametric linear
  classifier. The function obtains input parameters from the user,
  calls lin_predict(), and displays classification results.
  
  In case of success, return 0. In case of failure, return error
  code. In case of file access error, return the relevant file name in
  'xname'.
*/
int p_lin_predict(int method, char **xname)
{
  int   i;
  int   j;
  int   type;
  int   errc;
  int   verbose;
  int   min_range;
  int   max_range;
  int   predicted_class;
  int   nmodels;
  int   rows;
  int   columns;
  float *predictions;
  char  *fname;
  char  *output_fname;
  float *weights;
  float **model;
  float ***models;
  FILE  *outdev;
  FILE  *fptr;
  FILE  *output_fptr;

  clear_screen();
  outdev = stdout;
  errc = 0;
  cursor_on();
  if ((method == PALG_LIN) || (method == PALG_BAG_LIN))
    fname = input_filename(PCP_UMSG_LD_FNAME, PCP_LIN, outdev);
  else
    fname = input_filename(PCP_UMSG_PLC_FNAME, PCP_PLC, outdev);
  fptr = fopen(fname, "r");
  if (fptr)
    {
      fclose(fptr);
      output_fname = strdup(PCP_RCL);
      output_fptr = fopen(output_fname, "w");
      if (output_fptr)
	{
	  models = lin_load_models(fname, &type, &nmodels, &weights, &rows, &columns, &errc);
	  if (models)
	    {
	      /*
		Check consistency between the models and test data set.
	      */
	      if (((teds->c > 1) && (teds->c != rows)) || (teds->d+1 != columns))
		errc = LERR_INCONSISTENT_LIN;
	      if (errc == 0)
		{
		  predictions = calloc(rows, sizeof(float));
		  teds->prediction = malloc(teds->nv*sizeof(int));
		  for (i = 0; i < teds->nv; i++)
		    {
		      fvec_set(predictions, rows, 0.0);
		      for (j = 0; j < nmodels; j++)
			{
			  model = models[j];
			  predicted_class = lin_predict(model, rows, teds->d+1, teds->x[i]);
			  predictions[predicted_class] += weights[j];
			}
		      predicted_class = fvec_argmax(predictions, rows);
		      teds->prediction[i] = predicted_class;
		      write_rcl(output_fptr, i, teds, tds);
		    }
		  free(predictions);
#if 0
		  for (j = 0; j < nmodels; j++)
		    svm_destroy_model(models[j]);
#endif
		  free(models);
		  fclose(output_fptr);
		  min_range = 0;
		  max_range = 1;
		  verbose = input_integer(stdin, outdev, OUTPUT_MSG, PCP_QLEN, 
					  &min_range, &min_range, &max_range);
		  /*
		    Display results.
		  */
		  predict_disp(teds, verbose, method);
		  pwait();
		}
	    }
	  else
	    {
	      *xname = fname;
	      fclose(output_fptr);
	      unlink(output_fname);
	    }
	}
      else
	{
	  errc = errno;
	  *xname = output_fname;
	}
    }
  else
    {
      errc = errno;
      *xname = fname;
    }
  reset_video();
  return errc;
}

/*
  Create a temporary file in the current directory and return the
  name. The file is created with permissions -rw-r--r--.

  Return (char *) 0 in case of error and set errno. The errors are
  malloc(), mkstemp() and fchmod() errors.
*/
char *tempfile(void)
{
  int  status = 0;
  int  fd;
  char *template;

  template = malloc(20*sizeof(char));
  if (template != (char *) 0)
    {
      sprintf(template, "pcp_XXXXXX");
      fd = mkstemp(template);
      if (fd != -1)
	{
	  status = fchmod(fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
	  if (status == 0)
	    status = close(fd);
	}
      else
	status = -1;
      if (status == -1)
	{
	  free(template);
	  template = (char *) 0;
	}
    }
  return template;
}

/*
  Insert underscore 'iffix' into 'fnames'. For example, 'iris1.dat'
  becomes 'iris1_nrm.dat'.
*/
static void rename_files(char **fnames, int length, char *iffix)
{
  int  j;
  int  pos;
  char *xtr;
  char *fname;

  if (fnames && (length > 0) && iffix)
    {
      for (j = 0; j < length; j++)
	{
	  xtr = base_name(fnames[j]);
	  if (xtr)
	    {
	      pos = strcspn(xtr, ".");
	      fname = str_insert(4, xtr, pos, "_", iffix);
	      free(xtr);
	      fnames[j] = fname;
	    }
	}
    }
}

/*
  Set 'dset1' and 'dset2' to the deep or shallow copies of TEDS and
  TDS, respectively, depending on user input. If the copies are deep,
  the current data sets will get replaced by the result of the
  subsequent operations; it the copies are shallow, new data sets will
  be created and saved to disk. 'iffix' is used to define the file
  names for the saved data sets.

  Return the mode (replace data sets -> 1, do not replace -> 0).
*/
int input_replace(struct dataset **dset1, struct dataset **dset2, char *iffix)
{
  int   do_not_replace;
  int   replace;
  int   mode;
  char  *msg;

  do_not_replace = P_DO_NOT_REPLACE;
  replace = P_REPLACE;
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  sprintf(msg, PCP_UMSG_REPLACE, replace, do_not_replace, do_not_replace);
  mode = input_integer(stdin, stdout, msg, PCP_QLEN, &do_not_replace, &do_not_replace,
		       &replace);
  free(msg);
  if (mode == P_REPLACE)
    *dset1 = teds;
  else 
    *dset1 = dataset_clone(teds);
  if (mode == P_REPLACE)
    *dset2 = tds;
  else
    *dset2 = dataset_clone(tds);
  if (dset1 && *dset1)
    rename_files((*dset1)->fnames, (*dset1)->c, iffix);
  rename_files((*dset2)->fnames, (*dset2)->c, iffix);
  return mode;
}

/*
  Utility function for feature extraction and selection. Saves
  'dset1', 'dset2'. If 'mode' is P_REPLACE, updates the status file.

  In case of error, return -1 and set 'errc'. In case of file error,
  set file name in 'xname'.
*/
int save_datasets(struct dataset *dset1, struct dataset *dset2, int mode, 
		  int *errc, char **xname)
{
  int  status;

  *errc = 0;
  status = dataset_write(dset1, xname);
  if (!status)
    status = dataset_write(dset2, xname);
  if ((mode == P_REPLACE) && !status)
    {
      status = save_sts(PCP_STS, teds, tds);
      if (status == -1)
	{
	  *errc = errno;
	  *xname = strdup(PCP_STS);
	  remove_datasets();
	}
    }
  if ((status == -1) && (*errc == 0))
    *errc = errno;
  return status;
}

/*
  Save the clusters defined by 'ncl' (number of clusters), 'ccard'
  (cluster cardinalities), 'clist' (list of vectors in the clusters),
  using file 'format'.

  If 'format' is Named Vector format (DATASET_FF_VECTOR), and TDS is
  in the Raw format (DATASET_FF_RAW) , we store vector IDs instead of
  the (non-existent) vector names.

  In case of error, set 'errc' and set file name in 'xname' (the only
  possible errors are file errors).
*/
void save_clusters(int ncl, int *ccard, int *clist, int format, int *errc, char **xname)
{
  int  i;
  int  j;
  int  k;
  int  status;
  int  idx;
  char *fname;
  FILE *fptr;
  
  status = 0;
  fname = (char *) 0;
  for (i = 0; (i < ncl) && !status; i++)
    {
      fname = malloc((strlen(PCP_PCP)+strlen(PCP_CLX)+MAX_INT_DIGITS+1)*sizeof(char));
      sprintf(fname, "%s%d%s", PCP_PCP, i+1, PCP_CLX);
      fptr = fopen(fname, "w");
      if (fptr)
	{
	  for (j = 0; j < ccard[i]; j++)
	    {
	      idx = clist[j];
	      if (format == DATASET_FF_VEC) 
		{
		  if (tds->format == DATASET_FF_VEC) 
		    status = fprintf(fptr, "%s\t", tds->xlab[idx]);
		  else if (tds->format == DATASET_FF_RAW)
		    status = fprintf(fptr, "%d\t", idx);
		}
	      for (k = 0; k < tds->d; k++)
		status = fprintf(fptr, "%g\t", tds->x[idx][k]);
	      fprintf(fptr, "\n");
	    }
	  free(fname);
	  status = fclose(fptr);
	}
      else
	status = -1;
    }
  if (status == -1)
    {
      *errc = errno;
      *xname = strdup(fname);
    }
}

/*
  A convenient memory allocator for simplex()
  optimization. Initializes the criterion vector `fval' and simplex
  `smx'.
*/
int smplx_alloc(float **fval, float ***smx, int n)
{
  int   i;
  int   status;
  float **mx;

  status = 0;
  mx = malloc((n+1)*sizeof(float *));
  if (mx)
    {
      for (i = 0; (i < n+1) && !status; i++)
	{
	  mx[i] = malloc(n*sizeof(float));
	  if (!mx[i])
	    status = -1;
	}
      if (!status)
	{
	  *fval = malloc((n+1)*sizeof(float));
	  if (!*fval)
	    status = -1;
	  else
	    *smx = mx;
	}
    }
  else
    status = -1;
  return status;
}

/*
  Accept input parameters and pass them to the corresponding ensemble
  (bagging or boosting) learning function.  

  In case of error, set 'errc'. If error is file access error, set
  'xname'.
*/
void pcp_ensemble(int method, int *errc, char **xname, int dbg)
{
  int   status;
  int   opt_method;
  int   ensemble_method; /* the ensemble variant of `method'; for example,        */
                         /* for PALG_SVM, the possible values for ensemble_method */
                         /* are PALG_BAG_SVM and PALG_ADABOOST_SVM                */
  float **t;
  int   mlp_nlayers;  
  int   *mlp_npl; 
  int   mlp_itmax;
  float mlp_range;
  float eta;
  float mu;
  unsigned int seed;
  char  *fname;
  int   nmodels;
  int   bag_size;
  float *weights;
  void  **models;
  struct mlp_options *mlp_optional;
  struct svm_parameter *parameters;
  void  *options = (void **) 0;
  void  **problems;
  FILE  *outdev;
  FILE  *fdbg = (FILE *) 0;

  fname = (char *) 0;
  models = (void **) 0;
  if (tds)
    {
      status = 0;
      /*
	TBD: decided to fix bag_size to 'nvec' in November
	2002. Perhaps revisit.
      */
      bag_size = tds->nv;
      outdev = stdout;
      fflush(outdev);
      if (dbg > 0)
	fdbg = fopen(PCP_DBG, "a");
      clear_screen();
      cursor_on();
      if (method == PALG_BAG_MLP)
	{
	  t = mlp_target(tds->c, tds->nd);
	  status = input_mlp(outdev, tds->d, tds->c, tds->nv, (int *) 0, &mlp_nlayers,
			     &mlp_npl, &mlp_itmax, &mlp_range, &opt_method, &eta, &mu,
			     (int *) 0, 0, (int *) 0, &seed, &fname);
	  if (status == 0)
	    {
	      mlp_optional = calloc(1, sizeof(struct mlp_options));
	      mlp_optional->nlayers = mlp_nlayers;
	      mlp_optional->npl = mlp_npl;
	      mlp_optional->itmax = mlp_itmax;
	      mlp_optional->range = mlp_range;
	      mlp_optional->opt_method = opt_method;
	      mlp_optional->eta = eta;
	      options = mlp_optional;
	    }
	}
      else if (method == PALG_SVM)
	{
	  parameters = calloc(1, sizeof(struct svm_parameter));
	  options = parameters;
	  status = input_svm(outdev, tds->c, tds->nv, (struct svm_problem *) 0,
			     parameters, &fname, (int *) 0, 0, 1, PCP_SVM_K_NONE, 
			     &ensemble_method);
	  seed = input_seed(stdin, outdev);
	}
      else if ((method == PALG_BAG_LIN) || (method == PALG_BAG_PLC))
	{
	  clear_screen();
	  cursor_on();
	  if (method == PALG_BAG_LIN)
	    fname = input_filename(LD_MSG, PCP_LIN, outdev);
	  else
	    fname = input_filename(CLASSIFIER_MSG, PCP_PLC, outdev);
	  seed = input_seed(stdin, outdev);
	}
      else if (method == PALG_BAG_PQC)
	{
	  clear_screen();
	  cursor_on();
	  fname = input_filename(CLASSIFIER_MSG, PCP_PQC, outdev);
	  seed = input_seed(stdin, outdev);
	}
      if (status == 0)
	{
	  nmodels = input_nmodels(stdin, outdev);
	  inverse_video();
	  srand(seed);
	  if (method == PALG_SVM)
	    {
	      if (ensemble_method == PALG_BAG_SVM)
		models = (void **) bagging(tds, outdev, ensemble_method, fname, seed, nmodels, 
					   bag_size, options, &problems, errc, fdbg);
	      else if (ensemble_method == PALG_ADABOOST_SVM)
		models = adaboost(tds, ensemble_method, &nmodels, &weights, fname, seed, options, 
				  errc, fdbg);
	    }
	  else
	    models = (void **) bagging(tds, outdev, method, fname, seed, nmodels, 
				       bag_size, options, (void ***) 0, errc, fdbg);
	  reset_video();
	}
      if (!models)
	*xname = strdup(fname);
    }
  else
    *errc = PERR_UNDEFINED_TDS;
}

/*
  Combine matrices `x1' and `x2' with `n1' and `n2' rows,
  respectively. The combination is an array of pointers pointing to
  rows of x1 followed by rows of x2.
  
  In case of malloc() failure, return (float **) 0 and set errno.
*/
float **combine_x(float **x1, int n1, float **x2, int n2)
{
  int   i;
  int   idx;
  int   nsamples;
  float **cx;
  
  nsamples = n1+n2;
  cx = malloc(nsamples*sizeof(float *));
  if (cx)
    {
      for (i = 0; i < n1; i++)
	cx[i] = x1[i];
      idx = n1;
      for (i = 0; i < n2; i++)
	cx[idx++] = x2[i];
    }
  return cx;
}


syntax highlighted by Code2HTML, v. 0.9.1