ports//science/pcp/work/pcp-2.2/src/estimate

/*
  File name: estimate_error.c
  Created by: Ljubomir Buturovic
  Created: 07/23/2001
  Purpose: Bayes error estimation using Fukunaga's k-NN method.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

/*
  The principal functions in this file are l_estimate() and
  r_estimate(). They provide upper and lower bounds, respectively, of
  the Bayes error estimate, using the k-NN method described in
  Keinosuke Fukunaga, Introduction to Statistical Pattern Recognition,
  Second Edition, Chapter 7, Morgan Kaufmann, San Francisco, 1990.

  The method implemented in l_estimate() is fully leave-one-out. For
  the classification of each input vector, the covariance matrix of
  its' class is reestimated by removing the vector, and all nearest
  neighbors to the class are recomputed. As a result, the
  computational complexity of the method is high, as are its' memory
  requirements (roughly proportional to N^2, where N is the number of
  input vectors).

  The fully leave-one-out method implemented in l_estimate() is known
  as 'x'. The approximate method, achieved for 'approximate_l_mode' ==
  1 (see below), is known as 'l'. It should produce results identical
  to FORTRAN implementation of the same method, known as 'a'.
*/

/* TBD: seed unused? */

static char rcsid[] = "$Id: estimate_error.c,v 1.138 2005/05/06 14:40:03 ljubomir Exp $";

#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <unistd.h>
#include <float.h>
#include <errno.h>
#include "pcp.h"
#include "hash_util.h"
#include "lmat.h"
#include "lau.h"
#include "knn.h"
#include "pau.h"
#include "estimate_error.h"

static int r_mode = 0;

/*
  The code uses two variables, 'r_mode' and 'approximate_l_mode',
  which control two different development/testing modes for
  l_estimate() function. 'r_mode' == 1 turns on the R estimation
  compatibility (i.e., l_estimate() will return the same results as
  r_estimate(). 'approximate_l_mode' == 1 turns on compatibility mode
  with approximate L estimate (identical to PCP FORTRAN function
  ee1()). This mode uses all available samples to estimate covariance
  matrices (i.e., the covariance matrix estimates are not
  leave-one-out, but resubstitution estimates).

  The following matrix determines the method executed by l_estimate():

  approximate_l_mode      r_mode             method

           0                 0                 'x'
           0                 1                 'r'
           1                 0                 'l'
           1                 1                 'l'
*/

/*
  LLR comparison function.
*/
static int compare_likelihoods(const void *llr1, const void *llr2)
{
  int retval = -1;
  struct LLR *l1;
  struct LLR *l2;
  
  l1 = (struct LLR *) llr1;
  l2 = (struct LLR *) llr2;
  if (l1->llr > l2->llr)
    retval = 1;
  else if (l1->llr == l2->llr)
    retval = 0;
  return retval;
}

/*
  Memory allocation for r_estimate(), l_estimate().
*/
static void ee_init(int c, int kmin, int kmax, float ***fpn, float **estimate)
{
  int i;
  int k;
  float **xpn;
  float *ex;

  xpn = malloc(c*sizeof(float *));
  for (i = 0; i < c; i++)
    {
      xpn[i] = malloc((kmax-kmin+1)*sizeof(float));
      for (k = kmin; k <= kmax; k++)
	xpn[i][k-kmin] = 0.0;
    }
  *fpn = xpn;
  ex = malloc((kmax-kmin+1)*sizeof(float));
  for (k = kmin; k <= kmax; k++)
    ex[k-kmin] = 0.0;
  *estimate = ex;
}

/*
  Free memory allocated by ee_init().
*/
static void ee_free(int c, float **fpn, float *estimate)
{
  int i;

  for (i = 0; i < c; i++)
    free(fpn[i]);
  free(fpn);
  free(estimate);
}

/*
  Calculate all pairwise distances for 'nv' vectors in 'x' using
  Mahalanobis distance measure. 'sigma' is array of 'c' inverse
  covariance matrices in Symmetric Storage Mode, where 'c' is number
  of classes. 'd' is dimension of input vectors. 'nd[i]' are class
  cardinalities.
  
  Returns an array of 'nv' memory pointers. 'distances[i][j]' is
  Mahalanobis distance between X[i] and X[j].

  In case of malloc() error, return (float **) 0 and set errno.
*/
static float **calculate_distances(float **x, int nv, int d, int c, int *nd,
				   float **sigma, FILE *outdev)
{
  int i;
  int j;
  int category;
  int class_count;
  float *matrix;
  float *mxv;
  float *mxe;
  float **distances;
  
  distances = malloc(nv*sizeof(float *));
  if (distances != (float **) 0)
    {
      mxv = malloc(d*sizeof(float));
      if (mxv != (float *) 0)
	{
	  mxe = malloc(d*sizeof(float));
	  if (mxe != (float *) 0)
	    {
	      for (i = 0; i < nv; i++)
		{
		  category = 0;
		  class_count = nd[0];
		  matrix = sigma[category];
		  if (outdev != (FILE *) 0)
		    fprintf(outdev, "Calculating distances for vector %7d.\n", i+1);
		  distances[i] = malloc(nv*sizeof(float)); 
		  if (distances[i] != (float *) 0)
		    {
		      for (j = 0; j < nv; j++)
			{
			  if (j == class_count)
			    {
			      class_count += nd[++category];
			      matrix = sigma[category];
			    }
			  distances[i][j] = mahalanobis(x[i], x[j], d, matrix, mxv, mxe);
			}
		    }
		  else
		    distances = (float **) mx_free((void **) distances, i);
		}
	      vx_free(mxe);
	    }
	  vx_free(mxv);
	}
    }
  return distances;
}

/*
  Calculate distances from all 'nv' vectors in 'x' to median vectors
  of all classes, using Mahalanobis distance measure. sigma[i] is
  inverse covariance matrices of class 'i' in SSM. 'd' is dimension of
  input vectors, 'c' is number of categories with 'nd[i]' vectors in
  category i.
  
  Returns an array of 'c' memory pointers. 'distances[i]' points to
  array of 'nv' distances to median vector of class i.

  In case of error, return (float **) 0 and set errno.
*/
static float **calculate_mean_distances(float **x, int nv, int d, int c, int *nd,
					float **sigma)
{
  int   i;
  int   j;
  int   m;
  int   cntr;
  int   status = 0;
  float *mean;
  float *mxv;
  float *mxe;
  float **distances = (float **) 0;
  
  cntr = 0;
  mean = malloc(d*sizeof(float));
  if (mean != (float *) 0)
    {
      mxv = malloc(d*sizeof(float));
      if (mxv != (float *) 0)
	{
	  mxe = malloc(d*sizeof(float));
	  if (mxe != (float *) 0)
	    {
	      distances = malloc(c*sizeof(float *));
	      if (distances != (float **) 0)
		{
		  for (i = 0; (i < c) && (status == 0); i++)
		    {
		      /*
			Estimate mean vector for class i.
		      */
		      for (j = 0; j < d; j++)
			{
			  mean[j] = 0.0;
			  for (m = 0; m < nd[i]; m++)
			    mean[j] += x[cntr+m][j];
			  mean[j] = mean[j]/nd[i];
			}
		      /*
			Done - now calculate Mahalanobis distances from all vectors in
			'x' to 'mean'.
		      */
		      distances[i] = malloc(nv*sizeof(float));
		      if (distances[i] != (float *) 0)
			{
			  for (m = 0; m < nv; m++)
			    distances[i][m] = mahalanobis(x[m], mean, d, sigma[i], mxv, mxe);
			  cntr += nd[i];
			}
		      else
			{
			  distances = (float **) mx_free((void **) distances, i);
			  status = -1;
			}
		    }
		  vx_free(mxe);
		}
	      vx_free(mxv);
	    }
	}
      vx_free(mean);
    }
  return distances;
}

/*
  Calculate k-nearest neighbor distances for all 'nv' vectors in 'x',
  for all 'c' classes, with 'nd[i]' vectors per class.  Returns an
  array of pointers to class neighbors, one per class. Each pointer
  points to the following memory structure:
  
  
                            +--------------+
           for nv vectors:  |       1      |
            k-th neighbor   |       2      |  k = kmin
            from class c    |      ...     |
                            |       nv     |
                            +--------------+
           for nv vectors:  |       1      |
            k-th neighbor   |       2      |  k = kmin+1
            from class c    |      ...     |
                            |       nv     |
                            +--------------+
                            |              |
                            |      ...     |
                            |              |
                            +--------------+
           for nv vectors:  |       1      |
            k-th neighbor   |       2      |  k = kmax
            from class c    |      ...     |
                            |       nv     |
                            +--------------+

*/
static float **calculate_neighbors(float **distances, int nv, int kmin, int kmax,
				   int c, int *nd, FILE *outdev, FILE *fdbg)
{
  int i;
  int j;
  int k;
  int m;
  int idx;
  int cidx;
  int cmax;
  int cnv; /* total number of vectors in classes 0..i */
  float *dist;
  float *nx;
  float **neighbors;
  
  neighbors = malloc(c*sizeof(float *));
  if (neighbors != (float **) 0)
    {
      cmax = ivec_max(nd, c);
      /* +1 in order to use finsort() */
      dist = malloc((cmax+1)*sizeof(float));
      cnv = 0;
      if (dist != (float *) 0)
	{
	  for (i = 0; i < c; i++)
	    {
	      if (outdev != (FILE *) 0)
		fprintf(outdev, "Calculating neighbors from class %7d.\n", i+1);
	      cidx = 0;
	      nx = malloc((kmax-kmin+2)*nv*sizeof(float));
	      if (nx != (float *) 0)
		{
		  for (m = 0; m < nv; m++)
		    {
		      idx = cidx;
		      for (j = 0; j < nd[i]; j++)
			dist[j] = distances[m][cnv+j];
		      finsort(dist, nd[i]);
		      if (fdbg != (FILE *) 0)
			for (j = 0; j < nd[i]; j++)
			  fprintf(fdbg, "calculate_neighbors(); target class: %3d; vector: %5d; dist[%5d]: %12.5g\n",
				  i+1, m+1, j, dist[j]);
		      for (k = kmin-1; k <= kmax; k++)
			{
			  nx[idx] = dist[k];
			  idx += nv;
			}
		      cidx++;
		    }
		  if (fdbg != (FILE *) 0)
		    {
		      for (j = 0; j < nv*(kmax-kmin+1); j++)
			fprintf(fdbg, "calculate_neighbors(); target class: %3d; nx[%5d]: %12.5g\n",
				i+1, j, nx[j]);
		    }
		  neighbors[i] = nx;
		  cnv += nd[i];
		}
	      else
		{
		  neighbors = (float **) 0;
		  dist = vx_free(dist);
		  neighbors = (float **) mx_free((void **) neighbors, i);
		}
	    }
	  vx_free(dist);
	}
    }
  return neighbors;
}

/*
  Get cardinality of classes 0..clx-1, with 'nd[i]' vectors per
  class. TBD: replace with ivec_sum().
*/
static int get_cardinality(int clx, int *nd)
{
  int i;
  int cdx = 0;
  
  for (i = 0; i < clx; i++)
    cdx += nd[i];
  return cdx;
}

/*
  Determine distances from all 'd'-dimensional vectors in 'x' to
  nearest neighbors in 'category', excluding vector 'icx' from
  'category'. The k-NN distances are calculated for k between 'kmin'
  and 'kmax' and are obtained by adjusting 'distances', using formulae
  (1), (2) and (3) of 05/01/2002, and (1) of 04/30/2002, from R&D,
  vol. III. 'xmult' is the product (X-M)*sigma^(-1) from formula
  (2). 'dxk' is distance from vector 'icx' to the centroid of
  'category'. 'c' is number of classes, 'nd' has class cardinalities.
  
  The index 'icx' is 0-based, and the range is 0 to nv-1.
  
  Memory layout of the neighbors is as shown for one class in
  calculate_neighbors().

  In case of malloc() error, return (float *) 0.  
*/
static float *recalculate_class_neighbors(int kmin, int kmax, float **x, int d, int nv,
					  float **distances, float *xmult, float dkx,
					  int category, int c, int *nd, int icx,
					  int approximate_l_mode, FILE *fdbg)
{
  int   i;
  int   j;
  int   mx;
  int   nx;
  int   idx;
  int   jcntr;
  char  *err_msg;
  float *neighbors = (float *) 0;
  float *dist = (float *) 0;
  float *xdiff; /* xmult*(Xa-Xb) from formula (2) */
  float distance;
  float dot; 
  float fnx;
  float factor;
  
  dist = malloc((nd[category])*sizeof(float));
  if (dist != (float *) 0)
    {
      xdiff = malloc(d*sizeof(float));
      if (xdiff != (float *) 0)
	{
	  mx = 0;
	  mx = get_cardinality(category, nd);
	  nx = nd[category];
	  fnx = (nx-2.0)/(nx-1.0);
	  factor = nx;
	  /* 'factor' is f from formula (1), R&D, vol. III, 04/30/2002 */
	  factor = factor/((factor-1.0)*(factor-1.0)-factor*dkx*dkx); 
	  neighbors = malloc((kmax-kmin+1)*nv*sizeof(float));
	  if (neighbors != (float *) 0)
	    {
	      for (i = 0; i < nv; i++)
		{
		  jcntr = 0;
		  /*
		    Calculate distances between vector 'i' and all vectors in
		    'category' except 'icx'.
		  */
		  for (j = 0; j < nd[category]; j++)
		    {
		      if (j != icx-mx)
			{
			  distance = distances[i][mx+j]; /* original distance between i and j */
			  if ((r_mode == 1) || (approximate_l_mode == 1))
			    dist[jcntr] = distance;
			  else
			    {
			      distance = distance*distance;
			      fvec_diff(x[i], x[j+mx], xdiff, d, (int *) 0);
			      /*
				'dot' is 'x' in formula (2) of
				05/01/2002, R&D, vol. III.
			      */
			      dot = fvec_dot(xdiff, xmult, d, (int *) 0); 
			      dist[jcntr] = fnx*(distance+factor*dot*dot);
			      dist[jcntr] = sqrt(dist[jcntr]);
			    }
			  jcntr++;
			}
		    }
		  finsort(dist, nd[category]-1);
		  idx = i;
		  for (j = kmin-1; j < kmax; j++)
		    {
		      neighbors[idx] = dist[j];
		      idx += nv;
		    }
		}
	    }
	  vx_free(xdiff);
	}
      vx_free(dist);
    }
  if ((neighbors == (float *) 0) && (fdbg != (FILE *) 0))
    {
      err_msg = strerror(errno);
      fprintf(fdbg, "recalculate_class_neighbors(); malloc() failure; %s\n", err_msg);
    }
  return neighbors;
}

/*
  Compute determinant of leave-one-out covariance matrix estimate, by
  excluding vector 'icx', for matrix characterized by 'mdet', 'edet'
  (determinant = mdet*2^edet). 'd' is number of features, 'nc' number
  of vectors in the class whose determinant is being reestimated, 'dm'
  distance from 'icx' to mean vector of its' class. Returns log2 of
  the updated determinant, so the actual determinant is
  2**return_value (mantisa is set to 1.0). Based on formula (7.67)
  from Fukunaga.
*/
static float recalculate_determinant(float mdet, float edet, int d, int nc, 
				     int icx, float dm, int approximate_l_mode)
{
  float efactor;
  float fx;
  float det;
  
  efactor = log(2.0);
  fx = 1.0-nc*dm*dm/((nc-1.0)*(nc-1.0));
  det = log(mdet)+edet*efactor+d*log((nc-1.0)/(nc-2.0))+log(fx);
  det = det/efactor;
  if ((r_mode == 1) || (approximate_l_mode == 1))
    det = edet+log(mdet)/efactor;
  return det;
}

static float calculate_llr(int d, int nv1, int nv2, float d1, float d2,
			   float mdet1, float edet1, float mdet2, float edet2)
{
  float llr;
  float constant_term;
  
  constant_term = nv1;
  constant_term = log(constant_term/nv2);
  constant_term += 0.5*(log(mdet1/mdet2)+(edet1-edet2)*log(2.0));
  if ((d1 != 0) && (d2 != 0))
    llr = d*log(d1/d2)+constant_term;
  else if (d1 != 0)
    llr = FLT_MAX;
  else
    llr = -FLT_MAX;
  return llr;
}

/*
  Determine more likely class between 'c1' and 'c2' of cardinalities
  'nv1', 'nv2', given decision threshold 't', distances 'd1' and 'd2'
  and determinants 'mdet1', 'edet1', 'mdet2', 'edet2', for a
  'd'-dimensional vector characterized by distances 'd1' and
  'd2'. 'd1' is distance between the vector being classified and k-th
  nearest neighbor from class 1. 'd2' is distance beween the vector
  and k-th nearest neighbor from class 2.  Based on formula (7.5) from
  Fukunaga. Returns 'c1' or 'c2'.
*/
static int assign_class(int d, int c1, int c2, int nv1, int nv2, float d1, float d2,
			float mdet1, float edet1, float mdet2, float edet2, float t)
{
  int   aclass;
  float llr;
  
  llr = calculate_llr(d, nv1, nv2, d1, d2, mdet1, edet1, mdet2, edet2);
  aclass = c1;
  if (llr > t)
    aclass = c2;
  return aclass;
}

/*
  Return 0 if 'index' and 'lix' index the same vector, otherwise
  return 1. 'index' range is from 0..nc1+nc2-1, 'lix' from 0..nc1-1 if
  'category' equals CLASS_C1, 0..nc2-1 if 'category' equals CLASS_C2.
*/
static int cmp_index(int index, int lix, int category, int nc1, int nc2)
{
  int check = 1;

  if (r_mode != 1)
    if (lix != IDX_NONE)
      {
	if ((category == CLASS_C1) && (index < nc1))
	  {
	    if (index == lix)
	      check = 0;
	  }
	else if ((category == CLASS_C2) && (index >= nc1))
	  {
	    if (index-nc1 == lix)
	      check = 0;
	  }
      }
  return check;
}

/*
  Find the optimal threshold for deciding between classes c1 and c2,
  characterized by covariance matrix determinants 'mdet1', 'edet1' and
  'mdet2', 'edet2'.  'c1_c1_neighbors', 'c2_c1_neighbors' and
  'c1_c2_neighbors', 'c2_c2_neighbors' are distances to k-th nearest
  neighbor from class c1 and c2, respectively, for all nc1+nc2
  'd'-dimensional vectors. The length of 'c1_*' arrays is 'nc1' and
  the length of 'c2_*' arrays is 'nc2'. The optimal threshold is
  defined as the log-likelihood value which gives least classification
  error between the two classes.

  Leave-one-out functionality added 09/03/2001: if 'lix' != IDX_NONE,
  omit vector 'lix' from 'category' from optimization.  'lix' range is
  0..nc1-1 for 'category' == CLASS_C1, 0..nc2-1 for 'category' ==
  CLASS_C2.  

  In case of success, errno is set to 0; otherwise, it is set to any
  of the values set by malloc().
*/
static float find_optimal_threshold(int d, int nc1, int nc2, float mdet1,
				    float edet1, float mdet2, float edet2,
				    float *c1_c1_neighbors, float *c2_c1_neighbors,
				    float *c1_c2_neighbors, float *c2_c2_neighbors, 
				    int lix, int category, FILE *fdbg)
{
  int    i;
  int    j;
  int    mc;
  int    cmc;
  int    done;
  int    nix1;
  int    nix2;
  int    icntr;
  float  constant_term;
  float  threshold = 0.0;
  float  ct;
  float  nxct;
  float  d1;
  float  d2;
  float  det;
  float  rcc;
  char   *err_msg;
  struct LLR *llr;
  
  errno = 0;
  constant_term = nc1;
  constant_term = log(constant_term/nc2);
  det = 0.5*(log(mdet1/mdet2)+(edet1-edet2)*log(2.0));
  constant_term += det;
  nix1 = nc1;
  nix2 = nc2;
  rcc = 0.0;
  if (r_mode != 1)
    {
      if ((lix != IDX_NONE) && (category == CLASS_C1))
	{
	  nix1--;
	  rcc = nc1;
	  rcc = log(nix1/rcc);
	}
      if ((lix != IDX_NONE) && (category == CLASS_C2))
	{
	  nix2--;
	  rcc = nc2;
	  rcc = log(rcc/nix2);
	}
    }
  constant_term += rcc;
  icntr = 0;
  llr = malloc((nix1+nix2)*sizeof(struct LLR));
  if (llr != (struct LLR *) 0)
    {
      for (i = 0; i < nc1+nc2; i++)
	{
	  if (cmp_index(i, lix, category, nc1, nc2) != 0)
	    {
	      if (i < nc1)
		{
		  d1 = c1_c1_neighbors[i];
		  d2 = c1_c2_neighbors[i];
		  llr[icntr].category = CLASS_C1;
		}
	      else
		{
		  d1 = c2_c1_neighbors[i-nc1];
		  d2 = c2_c2_neighbors[i-nc1];
		  llr[icntr].category = CLASS_C2;
		}
	      if (d2 != 0.0)
		{
		  if (d1 != 0.0)
		    llr[icntr].llr = d*log(d1/d2)+constant_term;
		  else
		    llr[icntr].llr = -FLT_MAX;
		}
	      else
		{
		  if (d1 == 0.0)
		    llr[icntr].llr = constant_term;
		  else
		    llr[icntr].llr = FLT_MAX;
		}
#if 0
	      if ((fdbg != (FILE *) 0) && (lix == 8) && (category == CLASS_C2))
		fprintf(fdbg, "find_optimal_threshold(); lix: %5d; i: %5d; "
			"d1: %12.5g; d2: %12.5g; det: %12.5g; constant_term: %12.5g\n",
			lix+1, i, d1, d2, det, constant_term);
#endif
	      icntr++;
	    }
	}
      qsort(llr, nix1+nix2, sizeof(struct LLR), compare_likelihoods); 
      /*
	Set threshold to large negative value. Then, everything is
	classified as c2, hence misclassification count equals nc1.
      */
      threshold = -FLT_MAX;
      cmc = nix1;
      mc = nix1;
      /*
	Set threshold between each two neighboring log-likelihood ratios
	in turn, and adjust misclassification count.
      */
      ct = threshold;
      for (i = 0; i < nix1+nix2-1; i++)
	{
	  nxct = 0.5*(llr[i].llr+llr[i+1].llr);
#if 0
	  if ((fdbg != (FILE *) 0) && (lix == 8) && (category == CLASS_C2))
	    fprintf(fdbg, "find_optimal_threshold(); lix: %5d; threshold: %12.5g; nxct: %12.5g; llr[%5d]: "
		    "%12.5g; llr[%5d]: %12.5g\n", lix+1, threshold, nxct, 
		    i, llr[i].llr, i+1, llr[i+1].llr);
#endif
	  /*
	    Vector i, and all vectors having the same llr as i, may have
	    changed classification from c2 to c1. If that is the case,
	    update error count.
	  */
	  if (nxct != ct)
	    {
	      done = 0;
	      for (j = i; (j >= 0) && (done == 0); j--)
		{
		  if (llr[j].llr == llr[i].llr)
		    {
		      if (llr[j].category == CLASS_C1)
			mc -= 1;
		      else
			mc += 1;
		    }
		  else
		    done = 1;
		}
	    }
	  /*
	    If this is the best result so far, update the optimal
	    threshold.
	  */
	  if (mc < cmc)
	    {
	      cmc = mc;
	      threshold = nxct;
	    }
	  ct = nxct;
	}
      free(llr);
    }
  else if (fdbg != (FILE *) 0)
    {
      err_msg = strerror(errno);
      fprintf(fdbg, "find_optimal_threshold(); malloc() failure; %s\n", err_msg);
    }
  return threshold;
}

/*
  Calculate optimal thresholds for all pairs of classes.

  In case of malloc() error, return (float **) 0 and set errno.
*/
static float **get_optimal_thresholds(int nv, int d, int c, int *nd,
				      float *det, int kmin, int kmax, 
				      float **neighbors, FILE *fdbg)
{
  int status = 0;
  int length;
  int k;
  int c1;
  int c2;
  int cix;
  int cix1;
  int cix2;
  int idx;
  float threshold;
  float **optimal_thresholds = (float **) 0;
  
  length = (c-1)*c;
  length = length/2;
  idx = 0;
  optimal_thresholds = malloc((kmax-kmin+1)*sizeof(float *));
  if (optimal_thresholds != (float **) 0)
    {
      for (k = 0; (k <= kmax-kmin) && (status == 0); k++)
	{
	  cix = 0;
	  optimal_thresholds[k] = malloc(length*sizeof(float));
	  if (optimal_thresholds[k] != (float *) 0)
	    {
	      cix1 = 0; /* cumulative cardinality of classes 0..c1-1 */
	      for (c1 = 0; c1 < c-1; c1++)
		{
		  cix2 = cix1+nd[c1]; /* cumulative cardinality of classes 0..c2-1 */
		  for (c2 = c1+1; c2 < c; c2++)
		    {
		      threshold = 
			find_optimal_threshold(d, nd[c1], nd[c2], det[2*c1], det[2*c1+1],
					       det[2*c2], det[2*c2+1], &neighbors[c1][idx+cix1],
					       &neighbors[c1][idx+cix2], &neighbors[c2][idx+cix1], 
					       &neighbors[c2][idx+cix2], 
					       IDX_NONE, CLASS_NONE, fdbg);
		      if (fdbg != (FILE *) 0)
			fprintf(fdbg, "get_optimal_thresholds(); c1: %5d; c2: %5d; "
				"k: %5d; threshold: %12.5g\n", c1+1, c2+1, k+1, threshold);
		      optimal_thresholds[k][cix++] = threshold;
		      cix2 += nd[c2];
		    }
		  cix1 += nd[c1];
		}
	    }
	  else
	    {
	      status = -1;
	      optimal_thresholds = (float **) mx_free((void **) optimal_thresholds, k);
	    }
	  idx += nv;
	}
    }
  return optimal_thresholds;
}

static void log_thresholds(FILE *fdbg, char *method, float **optimal_thresholds, 
			   int kmin, int kmax, int c)
{
  int cix;
  int k;
  int c1;
  int c2;

  if (fdbg != (FILE *) 0)
    {
      for (k = kmin; k <= kmax; k++)
	{
	  cix = 0;
	  for (c1 = 0; c1 < c-1; c1++)
	    for (c2 = c1+1; c2 < c; c2++)
	      {
		fprintf(fdbg, "%s; optimal_thresholds[%d][%d]: %12.5g\n",
			method, k, cix, optimal_thresholds[k-kmin][cix]);
		cix++;
	      }
	}
    }
}

/*
  Calculate k-NN Bayes error leave-one-out estimate for set of 'nv'
  vectors in 'x'. The vectors are 'd'-dimensional and the number of
  classes is 'c', with 'nd[i]' vectors in class i and
  covariance/determinant information in 'sigma' and 'det',
  respectively. The estimates are calculated for k between 'kmin' and
  'kmax', inclusive. 'distances' are pairwise distances between all
  vectors in 'x'. 'neighbors' are resubstitution neighbors. 'fpn' has
  FN (false negatives) per class. Send messages to 'outdev', debug
  information to 'fdbg'.

  In case of error, return (float *) 0.
*/
static float *l_estimate(float **x, int nv, int d, int c, int *nd,
			 float **sigma, float *det, int kmin, int kmax,
			 float **mean_distances, float **distances, 
			 float **neighbors, int approximate_l_mode, 
			 float ***fpn, FILE *outdev, FILE *fdbg)
{
  int ivx;
  int k;
  int c1;
  int c2;
  int assigned_class = 0; /* assigned class for vector 'i' */
  int true_class; /* true class for vector 'i' */
  int class_count; /* total number of vectors in classes 0..true_class */
  int offx; /* offset of first vector in class true_class */
  int idx;
  int icx1;
  int icx2;
  int cix;
  int rx;
  float llr;
  float threshold;
  float d1;
  float d2;
  float edet;
  float *xmean;
  float *class_mean = (float *) 0;
  float *xmult;
  float *c1_c1_neighbors;
  float *c1_c2_neighbors;
  float *c2_c1_neighbors;
  float *c2_c2_neighbors;
  float *class_neighbors;
  float *estimate = (float *) 0;
  float **xpn;
  float **optimal_thresholds;
  hash_t *eliminated_classes;
  
  true_class = 0;
  class_neighbors = neighbors[0]; /* initialize */
  class_count = nd[0];
  offx = 0;
  optimal_thresholds = get_optimal_thresholds(nv, d, c, nd, det, kmin, kmax,
					      neighbors, fdbg);
  if (optimal_thresholds != (float **) 0)
    {
      log_thresholds(fdbg, "l_estimate()", optimal_thresholds, kmin, kmax, c);
      ee_init(c, kmin, kmax, fpn, &estimate);
      xpn = *fpn;
      xmean = malloc(d*sizeof(float)); /* store difference X-M; see R&D vol. III, 05/01/2002 */
      if (xmean != (float *) 0)
	{
	  xmult = malloc(d*sizeof(float)); /* store product (X-M)*sigma; see R&D vol. III, 05/01/2002 */
	  if (xmult != (float *) 0)
	    {
	      class_mean = mest(&x[offx], d, nd[true_class]);
	      for (ivx = 0; (ivx < nv) && (class_neighbors != (float *) 0); ivx++)
		{
		  if (outdev != (FILE *) 0)
		    fprintf(outdev, "Classifying vector %10d, for all k.\n", ivx+1);
		  if (ivx == class_count)
		    {
		      vx_free(class_mean);
		      offx += nd[true_class];
		      true_class++;
		      class_mean = mest(&x[offx], d, nd[true_class]);
		      class_count += nd[true_class];
		    }
		  fvec_diff(x[ivx], class_mean, xmean, d, (int *) 0);
		  fvec_smx(xmean, sigma[true_class], d, xmult);
		  if (r_mode == 1)
		    class_neighbors = neighbors[true_class];
		  else
		    class_neighbors = 
		      recalculate_class_neighbors(kmin, kmax, x, d, nv, distances,
						  xmult, mean_distances[true_class][ivx],
						  true_class, c, nd, ivx, approximate_l_mode, fdbg);
		  if (class_neighbors != (float *) 0)
		    {
		      edet = recalculate_determinant(det[2*true_class], det[2*true_class+1], 
						     d, nd[true_class], ivx, 
						     mean_distances[true_class][ivx], 
						     approximate_l_mode);
		      if (fdbg != (FILE *) 0)
			fprintf(fdbg, "l_estimate(); true_class: %5d; det: %12.5g; edet: %12.5g; L edet: %12.5g\n", 
				true_class+1, det[2*true_class], det[2*true_class+1], edet);
		      for (k = 0; k <= kmax-kmin; k++)
			{
			  idx = k*nv;
			  eliminated_classes = hashCreate();
			  cix = 0;
			  for (c1 = 0; c1 < c-1; c1++)
			    {
			      icx1 = get_cardinality(c1, nd);
			      for (c2 = c1+1; c2 < c; c2++)
				{
				  if ((hashGetInt(eliminated_classes, c1) == INT_MAX) &&
				      (hashGetInt(eliminated_classes, c2) == INT_MAX))
				    {
				      icx2 = get_cardinality(c2, nd);
				      /* 
					 'threshold' is optimal threshold for classes
					 'c1', 'c2', excluding vector 'ivx'.
				      */
				      if (true_class == c1)
					{
					  c1_c1_neighbors = &(class_neighbors[idx+icx1]);
					  c2_c1_neighbors = &(class_neighbors[idx+icx2]);
					  /*
					    TBD: perhaps precompute and store
					    cardinalities, for minor
					    speedup. 08/29/2001
					  */
					  rx = k*nv+get_cardinality(c1, nd);
					  c1_c2_neighbors = &neighbors[c2][rx];
					  rx = k*nv+get_cardinality(c2, nd);
					  c2_c2_neighbors = &neighbors[c2][rx];
					  threshold = 
					    find_optimal_threshold(d, nd[c1], nd[c2], 1.0, edet,
								   det[2*c2], det[2*c2+1],
								   c1_c1_neighbors, c2_c1_neighbors, 
								   c1_c2_neighbors, c2_c2_neighbors, 
								   ivx-get_cardinality(true_class, nd),
								   CLASS_C1, fdbg);
					  d1 = c1_c1_neighbors[ivx-icx1];
					  d2 = c1_c2_neighbors[ivx-icx1];
					  if ((r_mode == 1) || (approximate_l_mode == 1))
					    assigned_class = assign_class(d, c1, c2, nd[c1], nd[c2], d1, d2,
									  1.0, edet, det[2*c2], det[2*c2+1], 
									  threshold);
					  else
					    assigned_class = assign_class(d, c1, c2, nd[c1]-1, nd[c2], d1, d2,
									  1.0, edet, det[2*c2], det[2*c2+1], 
									  threshold);
					}
				      else if (true_class == c2)
					{
					  c1_c2_neighbors = &(class_neighbors[idx+icx1]);
					  c2_c2_neighbors = &(class_neighbors[idx+icx2]);
					  rx = k*nv+get_cardinality(c1, nd);
					  c1_c1_neighbors = &neighbors[c1][rx];
					  rx = k*nv+get_cardinality(c2, nd);
					  c2_c1_neighbors = &neighbors[c1][rx];
					  threshold = 
					    find_optimal_threshold(d, nd[c1], nd[c2], det[2*c1], 
								   det[2*c1+1], 1.0, edet,
								   c1_c1_neighbors, c2_c1_neighbors, 
								   c1_c2_neighbors, c2_c2_neighbors,
								   ivx-get_cardinality(true_class, nd), 
								   CLASS_C2, fdbg);
					  d1 = c2_c1_neighbors[ivx-icx2];
					  d2 = c2_c2_neighbors[ivx-icx2];
					  
					  if ((r_mode == 1) || (approximate_l_mode == 1))
					    assigned_class = assign_class(d, c1, c2, nd[c1], nd[c2], d1, d2,
									  det[2*c1], det[2*c1+1], 1.0, edet,
									  threshold);
					  else
					    assigned_class = assign_class(d, c1, c2, nd[c1], nd[c2]-1, d1, d2,
									  det[2*c1], det[2*c1+1], 1.0, edet,
									  threshold);
					}
				      else
					{
					  /*
					    If 'ivx' does not belong to either of the
					    two classes being classified against, then
					    use the optimized threshold value.
					  */
					  d1 = neighbors[c1][idx+ivx];
					  d2 = neighbors[c2][idx+ivx];
					  threshold = optimal_thresholds[k][cix];
					  assigned_class = assign_class(d, c1, c2, nd[c1], nd[c2], d1, d2,
									det[2*c1], det[2*c1+1], det[2*c2],
									det[2*c2+1], threshold);
					}
				      if (fdbg != (FILE *) 0)
					{
					  llr = calculate_llr(d, nd[c1], nd[c2], d1, d2,
							      det[2*c1], det[2*c1+1], det[2*c2],
							      det[2*c2+1]);
					  fprintf(fdbg, "l_estimate(); ivx: %4d; class: %3d; "
						  "c1: %3d; c2: %3d; k: %5d; d1: %10.5g; d2: %10.5g; "
						  "llr: %11.5g; thd: %11.5g; ac: %3d\n", 
						  ivx+1, true_class+1, c1+1, c2+1, k+1, d1, d2, 
						  llr, threshold, assigned_class+1);
					}
				      if (assigned_class == c1)
					hashPutInt(eliminated_classes, c2, c2);
				      else
					hashPutInt(eliminated_classes, c1, c1);
				    }
				  cix++;
				}
			    }
			  if (assigned_class != true_class)
			    {
			      estimate[k] += 1.0;
			      xpn[true_class][k] += 1.0;
			      if (fdbg != (FILE *) 0)
				fprintf(fdbg, "l_estimate(); misclassified ivx: %5d; k: %5d; "
					"true_class: %3d; assigned_class: %3d\n", 
					ivx+1, k+1, true_class+1, assigned_class+1);
			    }
			  hashDelete(eliminated_classes);
			}
		      if (r_mode != 1)
			free(class_neighbors);
		    }
		  else
		    estimate = (float *) 0;
		}
	      mx_free((void **) optimal_thresholds, kmax-kmin+1);
	      vx_free(xmult);
	      vx_free(class_mean);
	    }
	}
      vx_free(xmean);
    }
  return estimate;
}

/*
  Resubstitution Bayes error estimate. The method uses threshold
  optimization as described in Fukunaga, section 7.4, paragraph 'The
  threshold for non-normal distributions'.
*/
static float *r_estimate(float **x, int nv, int d, int c, int *nd,
			 float *det, int kmin, int kmax, 
			 float **distances, float **neighbors,
			 float ***fpn, FILE *outdev, FILE *fdbg)
{
  int i;
  int k;
  int c1;
  int c2;
  int assigned_class = 0; /* assigned class for vector 'i' */
  int true_class; /* true class for vector 'i' */
  int class_count; /* total number of vectors in classes 0..true_class */
  int cix;
  int idx;
  float threshold;
  float d1;
  float d2;
  float llr;
  float *estimate;
  float **xpn;
  float **optimal_thresholds;
  hash_t *eliminated_classes;
  
  if (outdev != (FILE *) 0)
    fprintf(outdev, "Calculating resubstitution error...");
  true_class = 0;
  class_count = nd[0];
  /*
    Calculate optimal threshold for each pair of classes.
  */
  optimal_thresholds = get_optimal_thresholds(nv, d, c, nd, det, kmin, kmax,
					      neighbors, fdbg);
  log_thresholds(fdbg, "r_estimate()", optimal_thresholds, kmin, kmax, c);
  ee_init(c, kmin, kmax, fpn, &estimate);
  xpn = *fpn;
  for (i = 0; i < nv; i++)
    {
      if (i == class_count)
	class_count += nd[++true_class];
      for (k = 0; k <= kmax-kmin; k++)
	{
	  eliminated_classes = hashCreate();
	  idx = k*nv+i;
	  cix = 0;
	  for (c1 = 0; c1 < c-1; c1++)
	    for (c2 = c1+1; c2 < c; c2++)
	      {
		if ((hashGetInt(eliminated_classes, c1) == INT_MAX) &&
		    (hashGetInt(eliminated_classes, c2) == INT_MAX))
		  {
		    d1 = neighbors[c1][idx];
		    d2 = neighbors[c2][idx];
		    threshold = optimal_thresholds[k][cix];
		    

		    assigned_class = assign_class(d, c1, c2, nd[c1], nd[c2], d1, d2,
						  det[2*c1], det[2*c1+1], det[2*c2],
						  det[2*c2+1], threshold);
		    if (fdbg != (FILE *) 0)
		      {
			llr = calculate_llr(d, nd[c1], nd[c2], d1, d2,
					    det[2*c1], det[2*c1+1], det[2*c2],
					    det[2*c2+1]);
			fprintf(fdbg, "r_estimate(); ivx: %4d; class: %3d; c1: %3d; c2: %3d; "
				"k: %4d; d1: %10.5g; d2: %10.5g; det: %10.5g; edet: %10.5g; "
				"llr: %11.5g; thd: %10.5g; ac: %3d\n", i+1, true_class+1, 
				c1+1, c2+1, k+1, d1, d2, det[2*true_class], 
				det[2*true_class+1], llr, threshold, assigned_class+1);
		      }
		    if (assigned_class == c1)
		      hashPutInt(eliminated_classes, c2, c2);
		    else
		      hashPutInt(eliminated_classes, c1, c1);
		  }
		cix++;
	      }
	  if (assigned_class != true_class)
	    {
	      estimate[k] += 1.0;
	      xpn[true_class][k] += 1.0;
	      if (fdbg != (FILE *) 0)
		fprintf(fdbg, "r_estimate(); misclassified ivx: %5d; k: %5d; "
			"true_class: %3d; assigned_class: %3d\n", 
			i+1, k+1, true_class+1, assigned_class+1);
	    }
	  hashDelete(eliminated_classes);
	}
    }
  for (k = 0; k <= kmax-kmin; k++)
    free(optimal_thresholds[k]);
  free(optimal_thresholds);
  if (outdev != (FILE *) 0)
    fprintf(outdev, " done.\n");
  return estimate;
}

/*
  Save Bayes error estimate 'bayes_error' and misclassification rate
  per class 'fpn', produced by 'method', for k between 'kmin' and
  'kmax' in 'fname'. Return 0 for success, -1 in case of failure and
  set errno.
*/
static int save_estimate(char *method, float *bayes_error, float **fpn, 
			 int c, int kmin, int kmax, char *fname)
{
  int retval = 0;
  int j;
  int k;
  FILE *fptr;
  
  /*
    If file exists, append to it, otherwise create it.
  */
  fptr = fopen(fname, "a");
  if (fptr == (FILE *) 0)
    retval = -1;
  else
    {
      for (k = 0; k < kmax-kmin+1; k++)
	{
	  fprintf(fptr, "%s\t%5d\t%10.2f", method, k+kmin, bayes_error[k]);
	  for (j = 0; j < c; j++)
	    fprintf(fptr, "\t%10.2f", fpn[j][k]);
	  fprintf(fptr, "\n");
	}
      fclose(fptr);
    }
  return retval;
}

/*
  Launch Bayes error estimation for TDS. 

  'errc' is output error code. If 'debug' is not 0, send debugging
  information to 'pcp.dbg' file.

  For 'approximate_l_mode' == 1, use approximate L mode.
*/
void p_bayes(int *errc, int approximate_l_mode, int debug)
{
  int   i;
  int   j;
  int   kmin;
  int   kmax;
  int   status;
  int   idx;
  int   knn;
  int   dflt;
  int   mink;
  int   maxk;
  float factor;
  char  *fname;
  char  *msg;
  float *det;
  float *bayes_l;
  float *bayes_r;
  float **distances;
  float **mean_distances;
  float **neighbors;
  float **fpn;
  FILE  *outdev;
  FILE  *fdbg;
  unsigned int seed;

  clear_screen();
  fname = (char *) 0;
  if ((r_mode == 1) && (approximate_l_mode == 1))
    r_mode = 0;
  *errc = 0;
  fdbg = (FILE *) 0;
  if (debug)
    fdbg = fopen(PCP_DBG, "a");
  outdev = stdout;
  dflt = 1;
  mink = 1;
  maxk = ivec_min(tds->nd, tds->c)-1;
  cursor_on();
  kmin = input_integer(stdin, outdev, " First nearest neighbor [1]:", PCP_QLEN, &dflt, &mink, &maxk);
  mink = kmin;
  msg = malloc((PCP_QLEN+1)*sizeof(char));
  snprintf(msg, PCP_QLEN+1, " Last nearest neighbor [%d]:", maxk);
  dflt = maxk;
  kmax = input_integer(stdin, outdev, msg, PCP_QLEN, &dflt, &mink, &maxk);
  free(msg);
  seed = input_seed(stdin, outdev);
  cursor_off();
  inverse_video();
  knn = kmax-kmin+1;
  status = dataset_sigma(tds, errc);
  if (!status)
    {
      det = malloc(2*tds->c*sizeof(float));
      if (det)
	{
	  factor = log(2.0);
	  idx = 0;
	  for (i = 0; i < tds->c; i++)
	    {
	      det[idx] = tds->det[idx];
	      idx++;
	      det[idx] = tds->det[idx]/factor;
	      idx++;
	    }
	  distances = calculate_distances(tds->x, tds->nv, tds->d, tds->c, tds->nd, 
					  tds->sigma, outdev);
	  neighbors = calculate_neighbors(distances, tds->nv, kmin, kmax, tds->c, 
					  tds->nd, outdev, fdbg);
	  mean_distances = calculate_mean_distances(tds->x, tds->nv, tds->d, tds->c, 
						    tds->nd, tds->sigma);
	  bayes_r = r_estimate(tds->x, tds->nv, tds->d, tds->c, tds->nd, det, kmin, 
			       kmax, distances, neighbors, &fpn, outdev, fdbg);
	  /*
	    'fpn' is class-conditional error rate, calculated as FN/CC (false
	    negatives/class cardinality), according to equation (2.1) from
	    B. D. Ripley, 'Pattern Recognition and Neural Networks', Cambridge
	    University Press, Cambridge, 1996, section 2.1.
	  */
	  if (bayes_r)
	    {
	      for (i = 0; i < knn; i++)
		{
		  bayes_r[i] = 100.0*(bayes_r[i]/tds->nv);
		  for (j = 0; j < tds->c; j++)
		    fpn[j][i] = 100.0*(fpn[j][i]/tds->nd[j]);
		}
	    }
	  fname = strdup(PCP_BEE);
	  unlink(fname);
	  status = save_estimate(RESUBSTITUTION, bayes_r, fpn, tds->c, kmin, kmax, fname);
	  ee_free(tds->c, fpn, bayes_r);
	  bayes_l = l_estimate(tds->x, tds->nv, tds->d, tds->c, tds->nd, tds->sigma,
			       det, kmin, kmax, mean_distances, distances, neighbors, 
			       approximate_l_mode, &fpn, outdev, fdbg);
	  if (bayes_l)
	    {
	      for (i = 0; i < knn; i++)
		{
		  bayes_l[i] = 100.0*(bayes_l[i]/tds->nv);
		  for (j = 0; j < tds->c; j++)
		    fpn[j][i] = 100.0*(fpn[j][i]/tds->nd[j]);
		}
	      status = save_estimate(LEAVE_ONE_OUT, bayes_l, fpn, tds->c, kmin, kmax, fname);
	      ee_free(tds->c, fpn, bayes_l);
	    }
	  else
	    *errc = 101; /* TBD: what is this error code? */
	  vx_free(det);
	}
      else
	*errc = errno;
    }
  free(fname);
}

static int p_disp_alloc(int nl, float **rval, float **lval)
{
  int status = 0;

  *rval = malloc(nl*sizeof(float));
  if (rval)
    {
      *lval = malloc(nl*sizeof(float));
      if (!lval)
	{
	  status = -1;
	  vx_free(*rval);
	}
    }
  else
    status = -1;
  return status;
}

/*
  Display Bayes error estimation results.
*/
void p_disp_bayes(int *errc, char **xname)
{
  int   i;
  int   nl;
  int   llen;
  int   cntr;
  int   rcntr;
  int   lcntr;
  int   status;
  int   kmin;
  char  *fname;
  char  *line;
  float *rval;
  float *lval;
  char  **tokens;
  FILE  *fptr;

  status = 0;
  clear_screen();
  cursor_on();
  fname = input_filename(PCP_UMSG_FILENAME, PCP_BEE, stdout);
  inverse_video();
  kmin = 0;
  nl = file_info(fname, &llen, (int *) 0, '\0');
  rcntr = 0;
  lcntr = 0;
  cntr = 0;
  if (nl >= 0)
    { 
      line = malloc((llen+2)*sizeof(char));
      status = p_disp_alloc(nl, &rval, &lval);
      if (!status)
	{
	  fptr = fopen(fname, "r");
	  if (fptr)
	    {
	      while (fgets(line, llen+2, fptr))
		{
		  tokens = str_tokenize(line, WHITESPACE);
		  if (cntr == 0)
		    kmin = atoi(tokens[1]);
		  if (!strcmp(tokens[0], RESUBSTITUTION))
		    rval[rcntr++] = atof(tokens[2]);
		  else if (!strcmp(tokens[0], LEAVE_ONE_OUT))
		    lval[lcntr++] = atof(tokens[2]);
		  str_free(tokens);
		  cntr++;
		}
	      free(line);
	      status = fclose(fptr);
	      if (status == -1)
		{
		  *errc = errno;
		  *xname = strdup(fname);
		}
	      else
		{
		  if (lcntr == rcntr)
		    {
		      printf("%s", PCP_XLINE);
		      printf("%s", PCP_EMPTY_LINE);
		      printf("|                B A Y E S   E R R O R   E S T I M A T I O N                 |\n");
		      printf("%s", PCP_EMPTY_LINE);
		      printf("%s", PCP_XLINE);
		      printf("|      k     | Leave-one-out error estimate  | Resubstitution error estimate |\n");
		      printf("%s", PCP_XLINE);
		      for (i = 0; i < rcntr; i++)
			{
			  printf("| %10d |                         %5.2f |                         %5.2f |\n", 
				 kmin+i, lval[i], rval[i]);
			}
		      printf("%s", PCP_XLINE);
		      pwait();
		    }
		  else
		    *errc = PERR_BAD_INPUT_FILE;
		}
	    }
	  else
	    {
	      *errc = errno;
	      *xname = strdup(fname);
	    }
	  vx_free(rval);
	  vx_free(lval);
	}
      else
	*errc = errno;
    }
  else
    {
      *errc = errno;
      *xname = strdup(fname);
    }
  free(fname);
}

/*
  Calculate leave-one-out k-NN estimate of Bayes error for set of 'nv'
  vectors in 'x'. The vectors are 'd'-dimensional and the number of
  classes is 'c', with 'nd[i]' vectors in class i. 'sigma[i]' is
  covariance matrix of class i in SSM. 'det[i]' is natural log of the
  determinant of the covariance matrix. The estimates are calculated
  for k between 'kmin' and 'kmax', inclusive. Send messages to
  'outdev', debug information to 'fdbg'.

  In case of error, return (float *) 0 and set 'errc'.  
*/
float *L_error(float **x, int nv, int d, int c, int *nd,
	       float **sigma, float *det, int kmin, int kmax,
	       FILE *outdev, FILE *fdbg, int *errc)
{
  int   i;
  int   status;
  int   idx;
  int   approximate_l_mode = 1;
  float efactor;
  float *det2;
  float *bayes_l = (float *) 0;
  float **distances;
  float **mean_distances;
  float **neighbors;
  float **fpn;

  status = 0;
  /*
    Convert 'det' to the format required by l_estimate().
  */
  if (status == 0)
    {
      det2 = malloc(c*2*sizeof(float));
      if (det2 != (float *) 0)
	{
	  idx = 0;
	  efactor = 1.0/log(2.0);
	  for (i = 0; i < c; i++)
	    {
	      det2[idx++] = 1.0;
	      det2[idx++] = det[i]*efactor;
	    }
	}
      else
	status = -1;
    }
  if (status == 0)
    {
      distances = calculate_distances(x, nv, d, c, nd, sigma, outdev);
      if (distances != (float **) 0)
	{
	  neighbors = calculate_neighbors(distances, nv, kmin, kmax, c, nd, outdev, fdbg);
	  if (neighbors != (float **) 0)
	    {
	      mean_distances = calculate_mean_distances(x, nv, d, c, nd, sigma);
	      if (mean_distances != (float **) 0)
		{
		  bayes_l = l_estimate(x, nv, d, c, nd, sigma, det2, kmin, kmax, 
				       mean_distances, distances, neighbors, 
				       approximate_l_mode, &fpn, outdev, fdbg);
		  if (bayes_l != (float *) 0)
		    {
		      for (i = 0; i < kmax-kmin+1; i++)
			bayes_l[i] = 100.0*(bayes_l[i]/nv);
		    }
		  mx_free((void **) mean_distances, c);
		}
	      else
		*errc = errno;
	      mx_free((void **) neighbors, c);
	    }
	  else
	    *errc = errno;
	  mx_free((void **) distances, nv);
	}
      else
	*errc = errno;
    }
  else
    *errc = errno;
  free(det2);
  return bayes_l;
}

/*
  Calculate inverted covariance matrices of `c' classes in `x' in SSM
  format.
*/
float **calc_sig(float **x, int c, int d, int *nd, float **det, int *errc)
{
  int   status;
  int   i;
  int   offset;
  float dsign;
  float dexp;
  float **sig;
  float **sigma;
  float **inverse; /* inverted covariance matrix for one class */
  float *x_det;      /* array of natural log of determinants */

  status = 0;
  sig = (float **) 0;
  x_det = malloc(c*sizeof(float));
  if (x_det)
    {
      sig = calloc(c, sizeof(float *));
      if (sig)
	{
	  /*
	    Estimate and invert covariance matrices in the transformed space.
	  */
	  offset = 0;
	  for (i = 0; (i < c) && !status; i++)
	    {
	      sigma = cest(&x[offset], d, nd[i], COVARIANCE_MATRIX);
	      /*
		Invert. 
	      */
	      inverse = fmx_inv(sigma, d, &dsign, &dexp, errc);
	      mx_free((void **) sigma, d);
	      if (!inverse)
		{
		  status = -1;
		  sig = (float **) mx_free((void **) sig, d);
		}
	      else
		{
		  /*
		    Convert 'sigma' to SSM, as required by L_error().
		  */
		  sig[i] = fmx_ssm(inverse, d);
		  mx_free((void **) inverse, d);
		  if (!sig[i])
		    {
		      sig = (float **) mx_free((void **) sig, i);
		      status = -1;
		      *errc = errno;
		    }
		  else
		    x_det[i] = dexp;
		}
	      offset += nd[i];
	    }
	  if (!status && det)
	    *det = x_det;
	}
    }
  return sig;
}
syntax highlighted by Code2HTML, v. 0.9.1