/*
  File name: mlp.c
  Created by: Ljubomir Buturovic
  Created: 03/11/2001
  Purpose: multi-layer perceptron (MLP) learning and prediction.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

/*
  The principal function in this file is mlp_learn(). It trains a MLP
  network using various optimization methods, including conjugate
  gradient and gradient descent.

  p_mlp_learn() serves as an interface between the menu system and
  mlp_learn().

  The MLP algorithms and variable names in mlp_learn() largely follow
  notation and theory given in: Christopher M. Bishop, Neural Networks
  for Pattern Recognition, Chapter 4. Oxford University Press, Oxford,
  1995.
*/

static char rcsid[] = "$Id: mlp.c,v 1.201 2005/09/03 09:03:04 ljubomir Exp $";

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include "mlp.h"
#include "pau.h"
#include "lau.h"
#include "lmat.h"
#include "conjg.h"
#include "simplex.h"
#include "lerr.h"
#include "bagging.h"

/*
  The structure used to pass parameters to mlp_function() through
  wn_conj_gradient_method(). 
*/
struct mlp_parameters
{
  /*
    'int iteration' has to be the first member of the structure. MLP
    optimization function sets it to the current iteration.
  */
  int   iteration; 
  float **t;
  int   errc;
  struct mlp *perceptron;
  struct dataset *dset;
  FILE  *outdev;
  FILE  *fdbg;
};

static float sigmoid(float a)
{
  float g;

  g = 1.0/(1.0+exp(-a));
  return g;
}

/*
  Calculate node inputs 'a' and activations 'z' for inputs 'x' and
  network given in 'perceptron'.
*/
static void calculate_network(struct mlp *perceptron, float *x)
{
  int nodes;
  int i;
  int j;
  int d;
  int c;
  int layer;
  int nlayers;
  int cix;
  int cl;
  int iw;
  int   *npl;
  float *w;
  float *a;
  float *z;
  float *delta;

  w = perceptron->w;
  nodes = perceptron->nodes;
  nlayers = perceptron->nlayers;
  npl = perceptron->npl;
  d = perceptron->d;
  c = perceptron->npl[nlayers-1];
  a = perceptron->a;
  z = perceptron->z;
  delta = perceptron->delta;
  /* 
     'cix' is index of first node in previous layer. Current node
     takes activations from nodes cix, cix+1, ... cix+npl[layer-1].
  */
  cix = 0;
  iw = 0;
  layer = 0;
  cl = npl[0]; /* cumulative number of nodes up to the current layer */
  for (j = 0; j < nodes; j++)
    {
      if (j == cl)
	{
	  layer++;
	  cl += npl[layer];
	  if (layer >= 2)
	    cix += npl[layer-2]; 
	}
      a[j] = w[iw++]; /* bias term */
      if (layer == 0)
	for (i = 0; i < d; i++)
	  a[j] += w[iw++]*x[i];
      else
	{
	  for (i = 0; i < npl[layer-1]; i++)
	    a[j] += w[iw++]*z[cix+i];
	}
      z[j] = sigmoid(a[j]);
    }
}

/*
  Compute sample error, defined as sum of squares of differences
  between output and target values for each of the 'c' outputs.
*/
static double output_error(float *output, float *target, int c)
{
  int    i;
  double deviation;
  double dev;

  dev = 0.0;
  for (i = 0; i < c; i++)
    {
      deviation = output[i]-target[i];
      dev += deviation*deviation;
    }
  return dev;
}

/*
  Test functions.
*/

/*
  Test function. It calculates the value of error function for the
  perceptron described by 'perceptron' for a single sample 'x' and
  target value 't'.
*/
static double mlp_check_criterion(struct mlp *perceptron, float *x, float *t)
{
  int    cix;
  int    c;
  int    nlayers;
  double dev;
  int    *npl;
  float  *z;

  nlayers = perceptron->nlayers;
  npl = perceptron->npl;
  c = perceptron->npl[nlayers-1];
  z = perceptron->z;
  cix = ivec_sum(npl, nlayers-1); /* cix is index of first output node */
/*
  Calculate network outputs for sample 'x'.
*/
  calculate_network(perceptron, x);
/*
  Calculate output error.
*/
  dev = output_error(&z[cix], t, c);
  dev = 0.5*dev;
  return dev;
}

/*
  Test function. It calculates gradient 'gw' numerically at 'pw'. The
  function has signature and semantics identical to mlp_gradient().
*/
static void mlp_check_gradient(double *gw, double *pw, void *parameters)
{
  int   i;
  int   n;
  int   wlen;
  int   nsamples;
  float epsilon;
  float depsilon;
  float error_plus;
  float error_minus;
  float factor;
  float wx;
  float w;
  float **t;
  float **x;
  struct mlp_parameters *mlp_params;
  struct mlp *perceptron;
  FILE   *fdbg;

  mlp_params = (struct mlp_parameters *) parameters;
  perceptron = mlp_params->perceptron;
  fdbg = mlp_params->fdbg;
  nsamples = mlp_params->dset->nv;
  t = mlp_params->t;
  x = mlp_params->dset->x;
  wlen = perceptron->wlen;
  for (i = 0; i < wlen; i++)
    {
      gw[i] = 0.0;
      perceptron->w[i] = pw[i];
    }
  factor = 0.001;
  epsilon = factor;
/*
  Calculate approximate gradient by perturbing each weight in
  turn. Use formula (4.47) from Bishop.
*/
  for (n = 0; n < nsamples; n++)
    for (i = 0; i < wlen; i++)
      {
	wx = perceptron->w[i];
	/*epsilon = wx*factor;*/
	depsilon = epsilon+epsilon;
	w = wx+epsilon;
	perceptron->w[i] = w;
	error_plus = mlp_check_criterion(perceptron, x[n], t[n]);
	w -= depsilon;
	perceptron->w[i] = w;
	error_minus = mlp_check_criterion(perceptron, x[n], t[n]);
	gw[i] += (error_plus-error_minus)/depsilon;
	perceptron->w[i] = wx;
      }
  if (fdbg)
    for (i = 0; i < wlen; i++)
      fprintf(fdbg, "mlp_check_gradient(); leaving; perceptron->w[%d]: %f; "
	      "pw[%d]: %f; gw[%d]: %f\n", i, perceptron->w[i], i, pw[i], i, gw[i]);
}

/*
  Free mlp struct. Returns (struct mlp *) 0.
*/
struct mlp *mlp_free(struct mlp *perceptron)
{
  if (perceptron)
    {
      free(perceptron->npl);
      free(perceptron->w);
      free(perceptron->a);
      free(perceptron->z);
      free(perceptron->delta);
      free(perceptron->fname);
      free(perceptron);
    }
  return (struct mlp *) 0;
}

/*
  Return deep copy of 'perceptron'.
*/
struct mlp *mlp_clone(struct mlp *perceptron)
{
  int i;
  struct mlp *clone = (struct mlp *) 0;

  clone = calloc(1, sizeof(struct mlp));
  if (clone != (struct mlp *) 0)
    {
      clone->d = perceptron->d;
      clone->nlayers = perceptron->nlayers;
      clone->npl = malloc(perceptron->nlayers*sizeof(int));
      for (i = 0; i < perceptron->nlayers; i++)
	clone->npl[i] = perceptron->npl[i];
      clone->wlen = perceptron->wlen;
      clone->w = malloc(perceptron->wlen*sizeof(float));
      fvec_copy(clone->w, perceptron->w, perceptron->wlen);
      clone->nodes = perceptron->nodes;
      clone->a = malloc(perceptron->nodes*sizeof(float));
      fvec_copy(clone->a, perceptron->a, perceptron->nodes);
      clone->z = malloc(perceptron->nodes*sizeof(float));
      fvec_copy(clone->z, perceptron->z, perceptron->nodes);
      clone->delta = malloc(perceptron->nodes*sizeof(float));
      fvec_copy(clone->delta, perceptron->delta, perceptron->nodes);
      clone->noff = perceptron->noff;
      clone->seed = perceptron->seed;
      clone->range = perceptron->range;
      clone->iterations = perceptron->iterations;
      clone->p_iter = perceptron->p_iter;
      clone->itmax = perceptron->itmax;
      clone->error = perceptron->error;
      clone->mce = perceptron->mce;
      if (perceptron->fname)
	clone->fname = strdup(perceptron->fname);
    }
  return clone;
}

/*
  Calculate derivative of sigmoid function, _given value of sigmoid_
  'g' (as opposed to being given input to sigmoid). This is obviously
  faster.
*/
static float dsigmoid(float g)
{
  float gprime;

  gprime = g*(1.0-g);
  return gprime;
}

/*
  Generate target outputs for MLP, given input categorized in 'nc'
  classes with 'nd[i]' samples in class i.

  In case of malloc() failure, return NULL and set errno.
*/
float **mlp_target(int nc, int *nd)
{
  int   idx;
  int   jdx;
  int   nsamples;
  int   cthd;
  int   current_class;
  float **t;

  nsamples = ivec_sum(nd, nc);
  t = fmx_alloc(nsamples, nc);
  if (t)
    {
      cthd = nd[0];
      current_class = 0;
      for (idx = 0; idx < nsamples; idx++)
	{
	  if (idx >= cthd)
	    cthd += nd[++current_class];
	  for (jdx = 0; jdx < nc; jdx++)
	    {
	      if (jdx == current_class)
		t[idx][jdx] = MLP_TARGET_HIGH;
	      else
		t[idx][jdx] = MLP_TARGET_LOW;
	    }
	}
    }
  return t;
}

/*
  Determine index of first (left-most) node within 'layer'.
*/
static int get_layer_index(int layer, int *npl)
{
  int i;
  int idx;

  idx = 0;
  for (i = 0; i < layer; i++)
    idx += npl[i];
  return idx;
}

/* 
   Determine index (within 'w' array) of weight from node 'j', which
   is in 'layer', to first (left-most) node in next layer. 'd' is
   number of inputs.
*/
static int get_weight_index(int j, int d, int layer, int *npl)
{
  int i;
  int iw;

  /*
    First, there are (d+1)*npl[0] weights before layer 0.
  */
  iw = (d+1)*npl[0];
  /*
    Then, previous hidden layers.
  */
  for (i = 0; i < layer; i++)
    iw += (npl[i]+1)*npl[i+1];
  /*
    Current layer.
  */
  iw += j-get_layer_index(layer, npl)+1;
  return iw;
}

/*
  Writer function for mlp_save(), mlp_write().
*/
static void mlp_fprintf(FILE *outfile, struct mlp *perceptron, int mode, int index, float weight)
{
  int   i;
  int   j;
  int   l;
  int   iw;
  int   inode;
  int   idx = 1;
  float wt = 1.0;
  float *w;

  if (mode == MLP_MODE_APPEND)
    {
      idx = index;
      wt = weight;
    }
  w = perceptron->w;
  fprintf(outfile, "combined model / combined weight:       %d / %f\n", idx, wt);
  fprintf(outfile, "iterations:                             %d\n", perceptron->p_iter+perceptron->iterations);
  if (perceptron->method == MLP_OPT_CGPLUS)
    fprintf(outfile, "optimization method:                    %s\n", MLP_STR_CGPLUS);
  else if (perceptron->method == MLP_OPT_GRADIENT_DESCENT)
    fprintf(outfile, "optimization method:                    %s\n", MLP_STR_GRADIENT_DESCENT); 
  else if (perceptron->method == MLP_OPT_WNLIB)
    fprintf(outfile, "optimization method:                    %s\n", MLP_STR_WNLIB);
  else
    fprintf(outfile, "optimization method:                    %s\n", MLP_STR_UNSPECIFIED);
  fprintf(outfile, "seed:                                   %d\n", perceptron->seed);
  fprintf(outfile, "range of initial weights:               %f\n", perceptron->range);
  fprintf(outfile, "misclassified samples:                  %d\n", perceptron->mce);
  fprintf(outfile, "avg. error per node and sample (MSE):   %f\n", perceptron->error);
  fprintf(outfile, "number of layers:                       %d\n", perceptron->nlayers);
  for (i = 0; i < perceptron->nlayers; i++)
    fprintf(outfile, "nodes in layer %d:                       %d\n", i, perceptron->npl[i]);
  fprintf(outfile, "number of inputs:                       %d\n", perceptron->d);
  fprintf(outfile, "number of weights:                      %d\n", perceptron->wlen);
  iw = 0;
  for (i = 0; i < perceptron->npl[0]; i++)
    {
      fprintf(outfile, "weights from inputs to node  %3d:       ", i);
      for (j = 0; j <= perceptron->d; j++)
	{
	  fprintf(outfile, "%f ", w[iw]);
	  iw++;
	}
      fprintf(outfile, "\n");
    }
  inode = 0;
  for (i = 0; i < perceptron->nlayers-1; i++)
    {
      inode += perceptron->npl[i];
      for (j = 0; j < perceptron->npl[i+1]; j++)
	{
	  fprintf(outfile, "weights from layer %1d to node %3d:       ", i, j+inode);
	  for (l = 0; l <= perceptron->npl[i]; l++)
	    {
	      fprintf(outfile, "%f ", w[iw]);
	      iw++;
	    }
	  fprintf(outfile, "\n");
	}
    }
  /*
    One final newline needed to make bagging MLP file more
    readable.
  */
  fprintf(outfile, "\n");
}

/*
  Save multi-layer perceptron model described by 'perceptron' in
  perceptron->fname. The model is written (mode == MLP_MODE_WRITE) or
  appended (mode == MLP_MODE_APPEND) to the perceptron->fname file.
  If mode == MLP_MODE_APPEND, prepend 'index'/'weight' to the model
  saved, otherwise prepend the default values (1/1.0).

  In case of success, return 0. In case of failure, return -1 and set
  errno.  
*/
int mlp_save(struct mlp *perceptron, int mode, int index, float weight)
{
  int   status;
  FILE  *outfile = (FILE *) 0;

  status = 0;
  if (perceptron->fname)
    {
      if (mode == MLP_MODE_WRITE)
	outfile = fopen(perceptron->fname, "w");
      else
	outfile = fopen(perceptron->fname, "a");
      if (outfile)
	{
	  mlp_fprintf(outfile, perceptron, mode, index, weight);
	  fclose(outfile);
	}
      else
	status = -1;
    }
  return status;
}

/*
  File-pointer-based version of mlp_save().

  In case of success, return 0. In case of failure, return -1 and set
  errno.  
*/
int mlp_write(FILE *fptr, struct mlp *perceptron, int mode, int index, float weight)
{
  int status;

  status = 0;
  if (fptr)
    {
      mlp_fprintf(fptr, perceptron, mode, index, weight);
      fflush(fptr);
    }
  else
    status = -1;
  return status;
}

/*
  The function calculates gradient 'gw' of the criterion function for
  MLP defined by 'parameters->perceptron'. The gradient is calculated
  at point 'pw'.

  Internally, the function copies 'pw' into
  perceptron->w. perceptron->w is then used in calculations.
*/
static void mlp_gradient(double *gw, double *pw, void *parameters)
{
  int i;
  int j;
  int k;
  int n;
  int cix;
  int hix;
  int wlen;
  int layer;
  int nlayers;
  int c;
  int d;
  int idx;
  int kdx;
  int shift;
  int iw;
  int *npl;
  float sum;
  float zk;
  float *z;
  float *delta;
  double *dwt; /* instance (single vector) derivatives */
  float *wx;
  float **t;
  float **x;
  struct mlp_parameters *mlp_params;
  struct mlp *perceptron;
  struct dataset *dset;
  FILE   *fdbg;

  mlp_params = (struct mlp_parameters *) parameters;
  perceptron = mlp_params->perceptron;
  t = mlp_params->t;
  dset = mlp_params->dset;
  x = dset->x;
  fdbg = mlp_params->fdbg;
  fvec_dcopy(perceptron->w, pw, perceptron->wlen);
  wx = perceptron->w;
  d = perceptron->d;
  nlayers = perceptron->nlayers;
  c = perceptron->npl[nlayers-1];
  npl = perceptron->npl;
  wlen = perceptron->wlen;
  delta = perceptron->delta;
  z = perceptron->z;
  dvec_set(gw, wlen, 0.0);
  dwt = malloc(wlen*sizeof(double));
  if (fdbg)
    for (i = 0; i < wlen; i++)
      fprintf(fdbg, "mlp_gradient(); perceptron->w[%d]: %12.5g\n", i, perceptron->w[i]);
  cix = ivec_sum(npl, nlayers-1);
  for (n = 0; n < dset->nv; n++)
    {
/*
  Calculate network for sample 'n'.
*/
      calculate_network(perceptron, dset->x[n]);
/*
  Calculate deltas.
  
  Output nodes:
*/
      for (k = cix; k < cix+c; k++)
	{
	  zk = z[k];
	  delta[k] = zk*(1.0-zk)*(zk-t[n][k-cix]);
	  if (fdbg)
	    fprintf(fdbg, "mlp_gradient(); vector %d; output z[%d]: %12.5g; delta[%d]: %12.5g\n", 
		    n, k, zk, k, delta[k]);
	}
/*
  Hidden nodes:
*/
      idx = cix; /* in the following loop, idx is index of first (left-most) node in the current layer */
      for (layer = nlayers-2; layer >= 0; layer--)
	{
	  kdx = idx; /* index of first node in layer+1 */
	  idx -= npl[layer];
	  for (j = idx; j < idx+npl[layer]; j++)
	    {
	      sum = 0.0;
	      shift = npl[layer]+1;
	      /* iw is index of weight from node j to first node in next layer */
	      iw = get_weight_index(j, d, layer, npl); 
	      for (k = kdx; k < kdx+npl[layer+1]; k++)
		{
		  sum += wx[iw]*delta[k];
		  iw += shift;
		}
	      delta[j] = dsigmoid(z[j])*sum;
	    }
	}
      /*
	All deltas done. Calculate derivatives for sample 'n'.

	First calculate derivatives with respect to weights before
	layer 0 (i.e., weights connecting network inputs to layer 0).
      */
      j = 0;
      i = 0;
      for (iw = 0; iw < (d+1)*npl[0]; iw++)
	{
	  if ((iw%(d+1)) == 0) /* bias weight */
	    dwt[iw] = delta[j];
	  else
	    dwt[iw] = delta[j]*x[n][i++]; /* non-bias weight */
	  if (i == d)
	    {
	      i = 0;
	      j++;
	    }
	}
      /*
	Hidden layer derivatives.
      */
      layer = 0;  /* 'from' layer */
      j = 0;      /* current 'from' node */
      k = npl[0]; /* current 'to' node */
      hix = 0;    /* left-most node in 'layer' */
      idx = 0;    /* total number of weights in current layer */
      for (iw = (d+1)*npl[0]; iw < wlen; iw++)
	{
	  if ((idx%(npl[layer]+1)) == 0) /* bias weight */
	    dwt[iw] = delta[k];
	  else
	    dwt[iw] = z[j++]*delta[k]; /* non-bias weight */
	  if (j == hix+npl[layer])
	    {
	      j = hix;
	      k++;
	    }
	  idx++;
	  if (idx == (npl[layer]+1)*npl[layer+1]) /* next layer? */
	    {
	      hix += npl[layer];
	      j = hix;
	      layer++;
	      idx = 0;
	    }
	}
/*
  Update total derivatives.
*/
      for (iw = 0; iw < wlen; iw++)
	gw[iw] += dwt[iw];
    }
#ifdef TESTING
  /*
    If the file is compiled with -DTESTING, true and approximate
    gradient values are printed. The values should be very close;
    examples (gw: actual gradient; dwt: approximate gradient):

    gw[0]: -0.127655; dwt[0]: -0.127599
    gw[1]: -0.939811; dwt[1]: -0.939935
    gw[2]: -0.218290; dwt[2]: -0.218108
    gw[3]: -1.097668; dwt[3]: -1.097396
  */
  mlp_check_gradient(dwt, pw, parameters);
  fprintf(stdout, "perceptron->w: weights; pw: point; gw: gradient; dwt: app. gradient\n");
  for (iw = 0; iw < wlen; iw++)
    fprintf(stdout, "perceptron->w[%d]: %f; pw[%d]: %f; gw[%d]: %f; dwt[%d]: %f\n",
	    iw, perceptron->w[iw], iw, pw[iw], iw, gw[iw], iw, dwt[iw]);
  pwait();
#endif
  if (fdbg)
    for (iw = 0; iw < wlen; iw++)
      fprintf(fdbg, "mlp_gradient(); leaving; perceptron->w[%d]: %f; pw[%d]: %f; "
	      "gw[%d]: %f; dwt[%d]: %f\n",
	      iw, perceptron->w[iw], iw, pw[iw], iw, gw[iw], iw, dwt[iw]);
  free(dwt);
}

static void mlp_dgradient(double *pw, int n, double *gw, void *parameters, int *errc)
{
  mlp_gradient(gw, pw, parameters);
}

/*
  Calculate outputs for MLP defined in 'perceptron' for a given input
  'x'. The function allocates space for the returned vector - it is
  the caller's responsiblity to free() it.
*/
float *mlp_output(struct mlp *perceptron, float *x)
{
  int   i;
  int   nlayers;
  int   *npl;
  int   c;
  int   cix;
  int   iz;
  float *z;
  float *output;
  
  nlayers = perceptron->nlayers;
  npl = perceptron->npl;
  c = perceptron->npl[nlayers-1];
  output = malloc(c*sizeof(float));
  z = perceptron->z;
  cix = ivec_sum(npl, nlayers-1); /* cix is index of first output node */
  calculate_network(perceptron, x);
  iz = cix;
  for (i = 0; i < c; i++)
    output[i] = z[iz++];
  return output;
}

/*
  Testing function. Calculates derivatives dw[1..wlen]
  numerically. The function has signature and semantics identical to
  mlp_derivative().
*/
static void mlp_check_derivative(float *wt, float *dw, struct mlp *perceptron, float **x,
				 int nsamples, float **t, FILE *outdev, FILE *fdbg)
{
  int   i;
  int   n;
  int   wlen;
  float epsilon;
  float depsilon;
  float error_plus;
  float error_minus;
  float factor;
  float wx;
  float w;

  factor = 0.001;
  wlen = perceptron->wlen;
  for (i = 0; i < wlen; i++)
    perceptron->w[i] = wt[i+1];
/*
  This loop used for gdb debugging only.
  error_plus = 0;
  for (n = 0; n < nsamples; n++)
    error_plus += mlp_check_criterion(perceptron, x[n], t[n]);
*/
/*
  Calculate approximate derivatives by perturbing each weight in
  turn. Use formula (4.47) from Bishop. Note that Numerical Recipes
  function frprmn() requires that the derivatives be placed in
  dw[1..wlen].
*/
  for (i = 0; i < wlen; i++)
    dw[i+1] = 0.0;
  for (n = 0; n < nsamples; n++)
    for (i = 0; i < wlen; i++)
      {
	wx = perceptron->w[i];
	/*epsilon = wx*factor;*/
	epsilon = factor;
	depsilon = epsilon+epsilon;
	w = wx+epsilon;
	perceptron->w[i] = w;
	error_plus = mlp_check_criterion(perceptron, x[n], t[n]);
	w -= depsilon;
	perceptron->w[i] = w;
	error_minus = mlp_check_criterion(perceptron, x[n], t[n]);
	dw[i+1] += (error_plus-error_minus)/depsilon;
	perceptron->w[i] = wx;
      }
}

/*
  Return the number of MLPs in 'fptr'.
*/
static int get_nmlp(FILE *fptr, char *line, int llen)
{
  int nmlp = 0;

  while (fgets(line, llen+2, fptr))
    if (strncmp(line, MLP_SIGNATURE_STRING, strlen(MLP_SIGNATURE_STRING)) == 0)
      nmlp++;
  rewind(fptr);
  return nmlp;
}

/*
  Convert mnemonic optimization algorithm name 'name' into numeric
  code.
*/
static int convert_opt_method(char *name)
{
  int code;

  code = MLP_OPT_CGPLUS;
  if (name)
    {
      if (strstr(name, MLP_STR_CGPLUS))
	code = MLP_OPT_CGPLUS;
      else if (strstr(name, MLP_STR_GRADIENT_DESCENT))
	code = MLP_OPT_GRADIENT_DESCENT;
      else if (strstr(name, MLP_STR_WNLIB))
	code = MLP_OPT_WNLIB;
    }
  return code;
}

/*
  Load one or more multi-layer perceptrons described in 'fname' into
  (struct mlp **) array. The last element in the array is NULL.  In
  case of success, returns 0. In case of failure, returns -1 and sets
  errno.
*/
struct mlp **mlp_load(char *fname)
{
  int  i;
  int  j;
  int  llen;
  int  nl;
  int  status;
  int  idx;
  int  imlp;
  int  nmlp;
  int  index;
  float model_weight;
  char *xtr;
  char *str;
  char **str_weights;
  FILE *fptr;
  struct mlp *perceptron = (struct mlp *) 0;
  struct mlp **perceptrons = (struct mlp **) 0;

  status = file_info(fname, &llen, (int *) 0, '\0');
  if (status != -1)
    {
      fptr = fopen(fname, "r");
      if (fptr != 0)
	{
	  xtr = malloc(llen+2);
	  str = malloc(llen+2);
	  nmlp = get_nmlp(fptr, str, llen);
	  if (nmlp > 0)
	    {
	      status = 0;
	      perceptrons = calloc(nmlp+1, sizeof(struct mlp *));
	      if (!perceptrons)
		status = -1;
	      for (imlp = 0; (imlp < nmlp) && (status == 0); imlp++)
		{
		  perceptron = calloc(1, sizeof(struct mlp));
		  fscanf(fptr, "%40c%d/%f\n", xtr, &index, &model_weight);
		  fscanf(fptr, "%40c%d\n", xtr, &perceptron->p_iter);
		  fscanf(fptr, "%40c%s\n", xtr, str);
		  perceptron->method = convert_opt_method(str);
		  fscanf(fptr, "%40c%u\n", xtr, &perceptron->seed);
		  fscanf(fptr, "%40c%f\n", xtr, &perceptron->range);
		  fscanf(fptr, "%40c%d\n", xtr, &perceptron->mce);
		  fscanf(fptr, "%40c%f\n", xtr, &perceptron->error);
		  fscanf(fptr, "%40c%d\n", xtr, &perceptron->nlayers);
		  nl = perceptron->nlayers;
		  perceptron->npl = malloc(nl*sizeof(int));
		  perceptron->nodes = 0;
		  for (i = 0; i < nl; i++)
		    {
		      fscanf(fptr, "%40c%d\n", xtr, &perceptron->npl[i]);
		      perceptron->nodes += perceptron->npl[i];
		    }
		  perceptron->noff = perceptron->nodes-perceptron->npl[nl-1];
		  perceptron->a = malloc(perceptron->nodes*sizeof(float));
		  perceptron->z = malloc(perceptron->nodes*sizeof(float));
		  perceptron->delta = malloc(perceptron->nodes*sizeof(float));
		  fscanf(fptr, "%40c%d\n", xtr, &perceptron->d);
		  fscanf(fptr, "%40c%d\n", xtr, &perceptron->wlen);
		  perceptron->w = malloc(perceptron->wlen*sizeof(float));
		  idx = 0;
		  for (i = 0; i < perceptron->nodes; i++)
		    {
		      fgets(str, llen+2, fptr);
		      str_weights = str_tokenize(str+PCP_HFT, WHITESPACE);
		      j = 0;
		      if (str_weights)
			{
			  while (str_weights[j])
			    {
			      perceptron->w[idx] = atof(str_weights[j]);
			      idx++;
			      j++;
			    }
			  str_free(str_weights);
			}
		    }
		  perceptrons[imlp]= perceptron;
		}
	    }
	  else
	    {
	      status = -1;
	      errno = PERR_UNRECOGNIZED_MLP;
	    }
	  vx_free(xtr);
	  vx_free(str);
	  if (!status)
	    status = fclose(fptr);
	  else
	    fptr_close(fptr);
	  if (status)
	    perceptrons = (struct mlp **) 0;
	}
    }
  return perceptrons;
}

/*
  The function calculates the value of error function for the
  perceptron described by 'perceptron' at weight vector 'w'.
*/
static double mlp_function(double *w, void *parameters)
{
  int    i;
  int    status;
  int    cix;
  int    n;
  int    c;
  int    nlayers;
  int    iter;
  int    mce;
  int    class;
  int    mlp_class;
  int    nclass;
  double dev;
  int    *npl;
  float  *z;
  float  **t;
  struct mlp_parameters *mlp_params;
  struct mlp *perceptron;
  struct dataset *dset;

  mlp_params = (struct mlp_parameters *) parameters;
  mlp_params->errc = 0;
  perceptron = mlp_params->perceptron;
  dset = mlp_params->dset;
  t = mlp_params->t;
  /*
    Copy weight vector 'w' to 'perceptron'. perceptron->w is then used
    in calculations.
  */
  for (i = 0; i < perceptron->wlen; i++)
    perceptron->w[i] = w[i];
  nlayers = perceptron->nlayers;
  npl = perceptron->npl;
  c = perceptron->npl[nlayers-1];
  z = perceptron->z;
  cix = ivec_sum(npl, nlayers-1); /* cix is index of first output node */
  mce = 0;
  class = 0; /* actual class of sample 'n' */
  nclass = dset->nd[0]; /* number of samples in categories 0..class */
  dev = 0.0;
  for (n = 0; n < dset->nv; n++)
    {
/*
  Calculate network outputs for sample 'n'.
*/
      calculate_network(perceptron, dset->x[n]);
/*
  Determine predicted sample class (i.e., output node with maximum
  output).
*/
      mlp_class = fvec_argmax(&z[cix], c);
/*
  Update cumulative error, defined as sum of squares of
  differences between output and target errors across all
  outputs and for all vectors.

  TBD: using double precision 'dev' variable to calculate the total
  deviation has a dramatic positive effect on the learning
  performance. Strange, perhaps to be further investigated.
*/
      dev += output_error(&z[cix], t[n], c);
/*
  Update misclassification count.
*/
      if (class != mlp_class)
	mce++;
      if ((n == nclass-1))
	{
	  class++;
	  if (class < c)
	    nclass += dset->nd[class];
	}
    }
  perceptron->error = dev/dset->nv;
  perceptron->mce = mce;
  iter = mlp_params->iteration;
  if ((iter%10 == 0) || (perceptron->itmax == iter))
    {
      if (mlp_params->outdev)
	fprintf(mlp_params->outdev, 
		"Epoch: %8d; avg. error per output node: %10.7f; error rate: %6.2f   \n", 
		perceptron->p_iter+iter, perceptron->error/c, 100.0*perceptron->mce/dset->nv);
      /*dev = dev/2.0;*/ /* The Great Bug - fixed 09/02/2004 */
      status = mlp_save(perceptron, MLP_MODE_WRITE, 0, 0.0);
      if (status == -1)
	mlp_params->errc = errno;
    }
  perceptron->iterations = iter;
  dev = 0.5*dev; /* The Great Bug fix */
  return dev;
}
 
/*
  Converter between the mlp_func typedef and mlp_function().
*/
static double mlp_dfunction(double *w, int n, int iter, void *parameters, int *errc)
{
  struct mlp_parameters *mlp_params;

  mlp_params = (struct mlp_parameters *) parameters;
  mlp_params->iteration = iter;
  return mlp_function(w, parameters);
}

/*
  TBD: horrible kludge. Merge it with mlp_function() if it starts
  working. ljb, 07/24/2004
*/
static float mlp_simplex_function(float *w, int n, int iteration, void *parameters, int *errc)
{
  int    i;
  double *wd;
  struct mlp_parameters *mlp_params;

  mlp_params = (struct mlp_parameters *) parameters;
  mlp_params->iteration = iteration;
  wd = malloc(n*sizeof(double));
  for (i = 0; i < n; i++)
    wd[i] = w[i];
  return mlp_function(wd, parameters);
  free(wd);
}

static void set_parameters(struct mlp_parameters *parameters, struct dataset *dset,
			   float **t, struct mlp *mlp_perceptron, FILE *outdev, 
			   FILE *fdbg)
{
  parameters->iteration = mlp_perceptron->iterations;
  parameters->dset = dset;
  parameters->t = t;
  parameters->perceptron = mlp_perceptron;
  parameters->outdev = outdev;
  parameters->fdbg = fdbg;
}

/*
  Optimize weights of an MLP neural network using modified
  Polak-Ribiere conjugate gradient method. 't' are 'dset->nv' target
  output vectors.  The function calculates the set of weights which
  optimize the square error and stores them in mlp->w, and saves them
  in mlp->fname. It periodically reports results on 'outdev'.

  In case of success, return 0, otherwise return -1 and set error code
  in 'errc'.
*/
int mlp_optimize(struct dataset *dset, float **t, struct mlp *perceptron,
		 FILE *outdev, int *errc, FILE *fdbg)
{
  int    iflag;
  int    iter;
  int    status;
  int    irest;
  int    method;
  double eps;
  double *vect;
  struct mlp_parameters *parameters;

  status = 0;
  parameters = calloc(1, sizeof(struct mlp_parameters));
  set_parameters(parameters, dset, t, perceptron, outdev, fdbg);
  vect = double_clone(perceptron->w, perceptron->wlen);
  eps = 1.0e-5;
  method = 2; /* 1: FR; 2: PR; 3: PR+ */
  irest = 1;
  cgfam(perceptron->wlen, vect, mlp_dfunction, mlp_dgradient, irest, eps, method, 
	(void *) parameters, perceptron->itmax, &iter, &iflag, errc);
  if (errc)
    {
      if (iflag == -3)
	*errc = EINVAL;
      else if (iflag == -2)
	*errc = LERR_DESCENT;
      else if (iflag == -1)
	*errc = LERR_LNSEARCH;
      else
	*errc = 0;
    }
  if (errc && (*errc != 0))
    status = -1;
  free(vect);
  free(parameters);
  return status;
}

/*
  Memory allocator for mlp_optimize_gradient_descent(). Return -1 in
  case of failure.
*/
static int mlp_gradient_descent_alloc(struct mlp_parameters **parameters, int length,
				      double **gw, double **pw, double **deltaw)
{
  int status;

  status = -1;
  *parameters = calloc(1, sizeof(struct mlp_parameters));
  if (*parameters)
    {
      *gw = malloc(length*sizeof(double));
      if (*gw)
	{
	  *pw = calloc(length, sizeof(double));
	  if (*pw)
	    {
	      *deltaw = calloc(length, sizeof(double));
	      if (*deltaw)
		status = 0;
	    }
	}
    }
  return status;
}

/*
  Optimize weights of an MLP neural network using gradient descent
  algorithm defined by step size 'eta' and using momentum term defined
  by 'mu'. 't' are 'dset->nv' target output vectors.  The function
  calculates the set of weights which optimize the square error and
  stores them in mlp->w, and saves them in mlp->fname. It periodically
  reports results on 'outdev'.

  In case of success, return 0, otherwise return -1 and set error code
  in 'errc'.
*/
int mlp_optimize_gradient_descent(struct dataset *dset, float **t, struct mlp *perceptron,
				  float eta, float mu, FILE *outdev, int *errc, FILE *fdbg)
{
  int    i;
  int    c;
  int    nlayers;
  int    status;
  int    iter;
  double *gw;
  double *pw;
  double *deltaw;
  struct mlp_parameters *parameters;

  status = 0;
  parameters = (struct mlp_parameters *) 0;
  gw = (double *) 0;
  pw = (double *) 0;
  deltaw = (double *) 0;
  if (dset && perceptron)
    {
      nlayers = perceptron->nlayers;
      c = perceptron->npl[nlayers-1];
      status = mlp_gradient_descent_alloc(&parameters, perceptron->wlen, &gw, &pw, &deltaw);
      if (!status)
	{
	  set_parameters(parameters, dset, t, perceptron, outdev, fdbg);
	  dvec_fcopy(pw, perceptron->w, perceptron->wlen);
	  iter = 0;
	  while (iter <= perceptron->itmax)
	    {
	      mlp_gradient(gw, pw, parameters);
	      if (mu != 0.0)
		for (i = 0; i < perceptron->wlen; i++)
		  perceptron->w[i] += mu*deltaw[i];
	      for (i = 0; i < perceptron->wlen; i++)
		perceptron->w[i] -= eta*gw[i];
	      if (mu != 0.0)
		for (i = 0; i < perceptron->wlen; i++)
		  deltaw[i] = perceptron->w[i]-pw[i];
	      dvec_fcopy(pw, perceptron->w, perceptron->wlen);
	      mlp_function(pw, parameters);
	      iter++;
	      parameters->iteration = iter;
	    }
	  status = mlp_save(perceptron, MLP_MODE_WRITE, 0, 0.0);
	}
      else if (errc)
	*errc = errno;
    }
  else if (errc)
    *errc = errno;
  free(parameters);
  free(gw);
  free(pw);
  free(deltaw);
  return status;
}

static int mlp_simplex_alloc(float **fval, float ***smx, int n)
{
  int   i;
  int   status;
  float **mx;

  status = 0;
  mx = malloc((n+1)*sizeof(float *));
  if (mx)
    {
      for (i = 0; (i < n+1) && !status; i++)
	{
	  mx[i] = malloc(n*sizeof(float));
	  if (!mx[i])
	    status = -1;
	}
      if (!status)
	{
	  *fval = malloc((n+1)*sizeof(float));
	  if (!*fval)
	    status = -1;
	  else
	    *smx = mx;
	}
    }
  else
    status = -1;
  return status;
}

/*
  Optimize weights of an MLP neural network using Simplex algorithm of
  Nelder and Mead.  't' are 'dset->nv' target output vectors.  The
  function calculates the set of weights which optimize the square
  error and stores them in mlp->w, and saves them in mlp->fname. It
  periodically reports results on 'outdev'.

  In case of success, return 0, otherwise return -1 and set error code
  in 'errc'.
*/
static int mlp_optimize_simplex(struct dataset *dset, float **t, struct mlp *perceptron,
				FILE *outdev, int *errc, FILE *fdbg)
{
  int    i;
  int    j;
  int    iter;
  int    status;
  int    ndim;
  float  ftol;
  float  lambda;
  float  *fval;
  float  **smx;
  struct mlp_parameters *parameters;

  status = 0;
  ndim = perceptron->wlen;
  ftol = 1e-6;
  status = 0;
  parameters = calloc(1, sizeof(struct mlp_parameters));
  set_parameters(parameters, dset, t, perceptron, outdev, fdbg);
  status = mlp_simplex_alloc(&fval, &smx, ndim);
  if (!status)
    {
      for (i = 0; i < ndim; i++)
	smx[0][i] = (2*perceptron->range*rand()/(RAND_MAX+1.0))-perceptron->range;
      fval[0] = mlp_simplex_function(smx[0], ndim, 0, parameters, errc);
      if (*errc == 0)
	{
	  /*
	    Initialize the remaining ndim vertices of the
	    simplex, using equation (10.4.1) from Numerical
	    Recipes in C.
	  */
	  lambda = 1.0; /* TBD: hardcoded for now. */
	  for (i = 1; (i < ndim+1) && (*errc == 0); i++)
	    {
	      for (j = 0; j < ndim; j++)
		smx[i][j] = smx[0][j];
	      smx[i][i-1] += lambda;
	      fval[i] = mlp_simplex_function(smx[i], ndim, 0, parameters, errc);
	    }
	  simplex(smx, ndim, fval, mlp_simplex_function, (void *) parameters, 
		  perceptron->itmax, ftol, &iter, errc);
	  if (errc && *errc)
	    status = -1;
	}
      else
	status = -1;
    }
  free(parameters);
  return status;
}

/*
  Optimize weights of an MLP neural network using up to 'itmax'
  iterations of 'opt_method' optimization method.  'x' are 'nsamples'
  input vectors of length 'd' with 'nd[i]' samples per class 'i'. 't'
  are 'nsamples' target output vectors. 'eta' and 'mu' are the step
  size and momentum parameters for gradient descent optimization
  (therefore used only if 'opt_method' is
  MLP_OPT_GRADIENT_DESCENT). 'mlp_continue' is 0 to begin learning, 1
  to continue starting with network in 'fname'. The function
  calculates the set of weights which optimize the square error and
  stores them in 'fname'. It periodically reports results on 'outdev'.

  The possible values for opt_method are defined in mlp.h.

  In case of success, the function returns 'mlp' structure which
  contains the trained neural network and sets 'errc' to 0. In case of
  error, return NULL and set 'errc', except for LERR_LNSEARCH (line
  search failure in optimization routine) and LERR_DESCENT
  (optimization procedure unable to find descent direction) errors. In
  those cases, the function returns the trained network, but sets
  'errc' to LERR_LNSEARCH or LERR_DESCENT. This is done because
  optimization function often reports these errors, and yet finds a
  decent minimum. The caller may choose to ignore these two error
  codes.
*/
struct mlp *mlp_learn(int opt_method, float **x, int nsamples, int *nd, int d, 
		      float **t, int nlayers, int *npl, int itmax, float range, 
		      float eta, float mu, FILE *outdev, int mlp_continue, 
		      char *fname, unsigned int seed, int *errc, FILE *fdbg)
{
  int    status;
  int    i;
  int    wlen;
  int    nodes;
  int    c;
  float  *w;
  float  *dw;
  struct dataset *dset;
  struct mlp *perceptron = (struct mlp *) 0;
  struct mlp_parameters *parameters;
  struct mlp **perceptrons;

  c = 0;
  status = 0;
  if (errc)
    *errc = 0;
  /*
    If begin learning, initialize 'perceptron' pseudo-randomly,
    otherwise read it from 'fname'.
  */
  if (mlp_continue == 1)
    {
      perceptrons = mlp_load(fname);
      if (perceptrons)
	{
	  perceptron = perceptrons[0];
	  perceptron->fname = strdup(fname);
	  perceptron->itmax = itmax;
	  wlen = perceptron->wlen;
	  w = malloc((wlen+1)*sizeof(float));
	  for (i = 0; i < wlen; i++)
	    w[i] = perceptron->w[i];
	}
    }
  else
    {
      c = npl[nlayers-1];
      perceptron = calloc(1, sizeof(struct mlp));
      perceptron->itmax = itmax;
      perceptron->method = opt_method;
      if (fname && *fname)
	perceptron->fname = strdup(fname);
      wlen = (d+1)*npl[0];
      for (i = 0; i < nlayers-1; i++)
	wlen += (npl[i]+1)*npl[i+1];
      /*
	Allocate space for weights and derivatives vectors. Use 'wlen+1'
	because 'w' and 'dw' are 1-based vectors, as required by frprmn()
	API.
      */
      w = malloc((wlen+1)*sizeof(float));
      dw = malloc((wlen+1)*sizeof(float));
      perceptron->d = d;
      perceptron->nlayers = nlayers;
      nodes = 0;
      for (i = 0; i < nlayers; i++)
	nodes += npl[i];
      perceptron->nodes = nodes;
      perceptron->npl = malloc(nlayers*sizeof(int));
      for (i = 0; i < nlayers; i++)
	perceptron->npl[i] = npl[i];
      perceptron->a = malloc(nodes*sizeof(float));
      perceptron->z = malloc(nodes*sizeof(float));
      perceptron->delta = malloc(nodes*sizeof(float));
      perceptron->range = range;
      perceptron->seed = seed;
      perceptron->wlen = wlen;
      perceptron->w = malloc(wlen*sizeof(float));
      for (i = 0; i < wlen; i++)
	w[i] = (2*perceptron->range*rand()/(RAND_MAX+1.0))-perceptron->range;
      for (i = 0; i < wlen; i++)
	perceptron->w[i] = w[i];
      if (fdbg)
	for (i = 0; i < wlen; i++)
	  fprintf(fdbg, "mlp_learn(); perceptron->w[%d]: %12.5g\n", i, perceptron->w[i]);
    }
  if (outdev)
    fprintf(outdev, "\n");
  parameters = calloc(1, sizeof(struct mlp_parameters));
  dset = dataset_lt(d, c, nd, nsamples, (char **) 0, x);
  set_parameters(parameters, dset, t, perceptron, outdev, fdbg);
  if (*errc != 0)
    {
      perceptron = mlp_free(perceptron);
      status = -1;
    }
  else if (itmax > 0) 
    {
      perceptron->method = opt_method;
      if (opt_method == MLP_OPT_SIMPLEX)
	status = mlp_optimize_simplex(dset, t, perceptron, outdev, errc, fdbg);
      else if (opt_method == MLP_OPT_CGPLUS)
	status = mlp_optimize(dset, t, perceptron, outdev, errc, fdbg);
      else if (opt_method == MLP_OPT_GRADIENT_DESCENT)
	status = mlp_optimize_gradient_descent(dset, t, perceptron, eta, mu, outdev,
					       errc, fdbg);
      if (status == -1)
	{
	  if (errc && (*errc != LERR_LNSEARCH) && (*errc != LERR_DESCENT))
	    perceptron = mlp_free(perceptron);
	}
    }
  return perceptron;
}

/*
  Accept input parameters from the user and pass them to MLP learning
  function mlp_learn(). 
  
  In case of error, set 'errc'. If error is file access error, set
  'xname'.
*/
void p_mlp_learn(int *errc, char **xname, int *dbg)
{
  int   status;
  int   opt_method;
  int   mlp_continue;
  int   mlp_nlayers;  /* mlp_nlayers */
  int   mlp_itmax;  /* mlp_itmax */
  float mlp_range;  /* mlp_range */
  float eta;
  float mu;
  int   *mlp_npl; /* mlp_npl */
  char  *mlp_fname; /* mlp_fname */
  float **t;
  unsigned int mlp_seed;
  struct mlp *perceptron;
  FILE  *outdev;
  FILE  *fdbg = (FILE *) 0;

  *errc = 0;
  if (tds)
    {
      outdev = stdout;
      fflush(outdev);
      if (*dbg > 0)
	fdbg = fopen(PCP_DBG, "a");
      t = mlp_target(tds->c, tds->nd);
      mlp_continue = 0;
      status = input_mlp(outdev, tds->d, tds->c, tds->nv, &mlp_continue, &mlp_nlayers,
			 &mlp_npl, &mlp_itmax, &mlp_range, &opt_method, &eta, &mu,
			 (int *) 0, 0, (int *) 0, &mlp_seed, &mlp_fname);
      if (status == 0)
	{
	  inverse_video();
	  srand(mlp_seed);
	  perceptron = mlp_learn(opt_method, tds->x, tds->nv, tds->nd, tds->d, t,
				 mlp_nlayers, mlp_npl, mlp_itmax, mlp_range, eta,
				 mu, outdev, mlp_continue, mlp_fname, mlp_seed, 
				 errc, fdbg);
	  reset_video();
	  if (!perceptron)
	    status = -1;
	  else if (mlp_itmax != -1)
	    {
	      printf("\n");
	      print_line(stdout, "Done.", PCP_QLEN);
	      pwait();
	    }
	}
      if (status == -1)
	{
	  *errc = errno;
	  *xname = strdup(mlp_fname);
	}
    }
  else
    *errc = PERR_UNDEFINED_TDS;
}

/*
  Classify dataset 'dset' using MLP defined in 'fname', for dataset
  'dset'. The MLP outputs are placed in the returned struct mlp, and
  the classification results are placed in dset->cx. The function
  assigns each vector to the class corresponding to the node with the
  largest output, unless 'thd' is > 0. In that case, the assignment is
  performed only if the largest node output exceeds 'thd'.

  In case of error, return NULL and place error code in 'errc'.
*/
static struct mlp *recall(struct dataset *dset, float thd, char *fname, int *errc)
{
  struct mlp **perceptrons;
  struct mlp *perceptron;

  *errc = 0;
  perceptron = (struct mlp *) 0;
  /*
    Read MLP from 'fname' into mlp structure.
  */
  perceptrons = mlp_load(fname);
  if (perceptrons)
    perceptron = mlp_predict(dset, perceptrons, thd, errc);
  else
    *errc = errno;
  return perceptron;
}

/*
  Classify dataset 'dset' using MLP defined in 'perceptrons'. The MLP
  outputs are placed in the returned struct mlp, and the
  classification results are placed in dset->cx. The function assigns
  each vector to the class corresponding to the node with the largest
  output, unless 'thd' is > 0. In that case, the assignment is
  performed only if the largest node output exceeds 'thd'.

  'perceptrons' is an array of 'struct mlp' objects. It can be
  created, for example, from a file using calling mlp_load().

  In case of error, return NULL and place error code in 'errc'.
*/
struct mlp *mlp_predict(struct dataset *dset, struct mlp **perceptrons, float thd, int *errc)
{
  int   i;
  int   j;
  int   nl;
  int   noff;
  int   imax;
  int   icx;
  int   nmlp;
  int   imlp;
  int   nout;
  float cmax;
  float *mlp_output;
  struct mlp *perceptron;

  perceptron = (struct mlp *) 0;
  if (errc && perceptrons)
    {
      *errc = 0;
      nmlp = 0;
      while (perceptrons[nmlp])
	nmlp++;
      perceptron = perceptrons[0];
      nl = perceptron->nlayers-1;
      /*
	Check that the number of inputs equals number of features in
	'dset', and, in case the number of classes in dset is greater
	than 1, that it equals the number of outputs in the network.
	Otherwise the network and the dataset are incompatible.

	If the number of classes in 'dset' is 1, it means that the
	dataset is unlabeled, which is OK.
       */
      if (((dset->c == 1) || ((dset->c > 1) && (perceptron->npl[nl] == dset->c))) && 
	  (perceptron->d == dset->d))
	{
	  /*
	    For each MLP, calculate outputs and store results in dset->prediction.
	  */
	  dset->prediction = malloc(dset->nv*sizeof(int));
	  if (dset->prediction)
	    {
	      noff = perceptron->noff;
	      nl = perceptron->nlayers-1;
	      nout = perceptron->npl[nl];
	      mlp_output = malloc(nout*sizeof(float));
	      for (i = 0; i < dset->nv; i++)
		{
		  fvec_set(mlp_output, nout, 0.0);
		  for (imlp = 0; imlp < nmlp; imlp++)
		    {
		      calculate_network(perceptrons[imlp], dset->x[i]);
		      for (j = 0; j < nout; j++)
			mlp_output[j] += perceptron->z[noff+j];
		    }
		  /*
		    For nmlp > 1 (MLP bagging classifier), voting is
		    done by finding average outputs from all nmlp
		    classifiers, then assigning the class
		    corresponding to the largest output. This is
		    consistent with the rule recommended in: Amanda
		    J. C. Sharkey (Ed.), Combining Artificial Neural
		    Nets, Chapter 3, Section 3.2.1. Springer, London,
		    1999.
		  */
		  for (j = 0; j < nout; j++)
		    mlp_output[j] /= nmlp;
		  cmax = mlp_output[0];
		  imax = 0;
		  for (j = 1; j < nout; j++)
		    {
		      if (cmax < mlp_output[j])
			{
			  imax = j;
			  cmax = mlp_output[j];
			}
		    }
		  dset->prediction[i] = imax;
		  icx = dataset_class(i, teds->c, teds->nd);
		}
	      free(mlp_output);
	    }
	  else
	    *errc = errno;
	}
      else
	*errc = PERR_INCONSISTENT_MLP;
      if (*errc)
	{
	  for (i = 0; i < nmlp; i++)
	    mlp_free(perceptrons[i]);
	  free(perceptrons);
	  perceptron = (struct mlp *) 0;
	}
    }
  else if (errc)
    *errc = EINVAL;
  return perceptron;
}

void p_mlp_predict(int *errc, char **xname)
{
  int   verbose;
  int   min_range;
  int   max_range;
  int   i;
  float thd;
  char  *mlp_fname;
  char  *output_fname;
  FILE  *outdev;
  FILE  *fptr;
  struct mlp *perceptron;

  clear_screen();
  outdev = stdout;
  cursor_on();
  mlp_fname = input_filename(PCP_UMSG_FNAME_MLP, PCP_MLP, outdev);
  fptr = fopen(mlp_fname, "r");
  if (fptr)
    {
      fclose(fptr);
      output_fname = strdup(PCP_RCL);
      fptr = fopen(output_fname, "w");
      if (fptr)
	{
	  thd = 0.0;
	  thd = input_float(stdin, outdev, PCP_UMSG_DTHD, PCP_QLEN, 
			    &thd, (float *) 0, (float *) 0);
	  perceptron = recall(teds, thd, mlp_fname, errc);
	  if (perceptron)
	    {
	      min_range = 0;
	      max_range = 1;
	      verbose = input_integer(stdin, outdev, OUTPUT_MSG, PCP_QLEN, 
				      &min_range, &min_range, &max_range);

	      /*
		Store predictions in PCP_RCL.
	      */
	      for (i = 0; i < teds->nv; i++)
		write_rcl(fptr, i, teds, tds);
	      if (!fclose(fptr))
		{
		  /*
		    Display results.
		  */
		  predict_disp(teds, verbose, PALG_MLP);
		  pwait();
		}
	      else
		{
		  *errc = errno;
		  *xname = strdup(output_fname);
		}
	    }
	  else
	    *xname = strdup(mlp_fname);
	}
      else
	{
	  *errc = errno;
	  *xname = strdup(output_fname);
	}
    }
  else
    {
      *errc = errno;
      *xname = strdup(mlp_fname);
    }
}