/*
File name: emap.c
Created by: Ljubomir Buturovic
Created: 04/14/2002
Purpose: linear dimension reduction using E-mapping.
*/
/*
Copyright 2004 Ljubomir J. Buturovic
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
static char rcsid[] = "$Id: emap.c,v 1.88 2005/06/18 07:36:18 ljubomir Exp $";
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <float.h>
#include "emap.h"
#include "dataset.h"
#include "lau.h"
#include "pau.h"
#include "lmat.h"
#include "simplex.h"
#include "cda.h"
#include "estimate_error.h"
struct emap_crit_parameters
{
struct dataset *dset;
int lxd; /* dimension of the transformed space */
int kmin;
int kmax;
int vid; /* vertex ID */
float eval; /* the lowest function value so far */
char *fname; /* file to store resulting transformation */
FILE *outdev; /* destination for output messages */
FILE *fdbg; /* destination for debug messages */
};
/*
Calculate criterion function for E-mapping. Takes input dataset
'dset' and transformation matrix 'umx' and returns average
leave-one-out Bayes error estimate in the transformed space,
calculated for k between 'kmin' and 'kmax'. Dimensions of 'umx' are
'lxd' by 'd', where 'lxd' is dimension of transformed space.
In case of failure, return -1 and set 'errc'. Possible error codes:
ENOMEM; LERR_SINGCOV for singular covariance matrix.
*/
static float emap_crit(struct dataset *dset, float **umx, int lxd, int kmin, int kmax,
int *errc, FILE *outdev, FILE *fdbg)
{
int i;
int status;
int knn;
int offset;
float avg_bayes;
float dsign;
float dexp;
float **y;
float **sigma; /* covariance matrix for one class, in full storage mode */
float **sig; /* covariance matrices for all classes in SSM */
float **inverse; /* inverted covariance matrix for one class */
float *det; /* array of natural log of determinants */
float *bayes_l;
double dsum;
avg_bayes = -1.0;
status = 0;
*errc = 0;
/*
Transform input vectors.
*/
y = fmx_mult(dset->x, dset->nv, dset->d, umx, lxd, 1);
if (y)
{
det = malloc(dset->c*sizeof(float));
if (det)
{
sig = calloc(dset->c, sizeof(float *));
if (sig)
{
/*
Estimate and invert covariance matrices in the transformed space.
*/
offset = 0;
for (i = 0; (i < dset->c) && !status; i++)
{
sigma = cest(&y[offset], lxd, dset->nd[i], COVARIANCE_MATRIX);
/*
Invert.
*/
inverse = fmx_inv(sigma, lxd, &dsign, &dexp, errc);
mx_free((void **) sigma, lxd);
if (!inverse)
status = -1;
else
{
/*
Convert 'sigma' to SSM, as required by L_error().
*/
sig[i] = fmx_ssm(inverse, lxd);
mx_free((void **) inverse, lxd);
if (!sig[i])
{
sig = (float **) mx_free((void **) sig, i);
status = -1;
*errc = errno;
}
else
{
det[i] = dexp;
if (fdbg)
fprintf(fdbg, "emap_crit(); class %d determinant: %g\n", i, dexp);
}
}
offset += dset->nd[i];
}
if (!status)
{
/*
Estimate Bayes error in the transformed space.
*/
bayes_l = L_error(y, dset->nv, lxd, dset->c, dset->nd,
sig, det, kmin, kmax, outdev, fdbg, errc);
/*
Return average.
*/
if (*errc == 0)
{
dsum = 0.0;
knn = kmax-kmin+1;
for (i = 0; i < knn; i++)
dsum += bayes_l[i];
avg_bayes = dsum/knn;
if (fdbg)
fprintf(fdbg, "emap_crit(); Bayes in %d-dimensional space: %g\n",
lxd, avg_bayes);
}
else
status = -1;
vx_free(bayes_l);
}
sig = (float **) mx_free((void **) sig, dset->c);
}
else
*errc = errno;
vx_free(det);
}
else
*errc = errno;
mx_free((void **) y, dset->nv);
}
else
*errc = errno;
return avg_bayes;
}
static int save_mapping(float **umx, int lxd, int d, float ecrit, char *fname)
{
int status = 0;
int i;
int j;
float *vector;
FILE *fptr;
if (fname)
{
fptr = fopen(fname, "w");
if (fptr)
{
for (i = 0; i < lxd; i++)
{
vector = fvec_normalize(umx[i], d);
if (vector)
{
for (j = 0; j < d; j++)
fprintf(fptr, "%14.7g\t", vector[j]);
fprintf(fptr, "\n");
vx_free(vector);
}
else
status = -1;
}
fclose(fptr);
}
else
status = -1;
}
return status;
}
#define EMAP_FUNC_MSG " Bayes error estimate, mean value: "
/*
Compute value of EMAP criterion function for the transformation vector
'fvec'. The vector is array representation of the transformation
matrix which maps parameters->dset to parameters->d-dimensional space.
The argument 'n' is not used - it is calculated from values
contained in 'parameters'. But we have to accept it, since that is
the required signature for the simplex() criterion function.
The function converts 'fvec' to a matrix format, which is then passed
to the actual criterion computation function emap_crit(), and saves
results.
In case of error, set 'errc'. The errors are malloc() errors and
LERR_SINGCOV for singular covariance matrix.
*/
static float emap_func(float *fvec, int n, int iteration, void *parameters, int *errc)
{
int i;
int idx;
int lxd;
int kmin;
int kmax;
int status;
int vid;
float ecrit;
char *fname;
FILE *outdev;
FILE *fdbg;
char *msg;
float **umx;
struct dataset *dset;
struct emap_crit_parameters *emap_parameters;
ecrit = -1.0;
emap_parameters = (struct emap_crit_parameters *) parameters;
dset = emap_parameters->dset;
lxd = emap_parameters->lxd;
kmin = emap_parameters->kmin;
kmax = emap_parameters->kmax;
vid = emap_parameters->vid;
outdev = emap_parameters->outdev;
fdbg = emap_parameters->fdbg;
fname = emap_parameters->fname;
status = 0;
/*
'umx' is matrix-version of 'fvec'.
*/
umx = malloc(lxd*sizeof(float *));
if (umx)
{
idx = 0;
for (i = 0; i < lxd; i++)
{
umx[i] = &fvec[idx];
idx += dset->d;
}
ecrit = emap_crit(dset, umx, lxd, kmin, kmax, errc, (FILE *) 0, fdbg);
if (ecrit < 0)
status = -1;
else
{
/*
Only save the mapping if the criterion function value is
less than the current minimum eval.
*/
/*if ((iteration > 0) && (ecrit < emap_parameters->eval))*/
{
emap_parameters->eval = ecrit;
status = save_mapping(umx, lxd, dset->d, ecrit, fname);
if (status == -1)
*errc = errno;
}
}
vx_free(umx);
}
else
*errc = errno;
if (!*errc)
{
msg = malloc(1000);
if (msg)
{
if (iteration == 0)
sprintf(msg, "Iteration %7d; vertex %7d; %s %7.2f", iteration, vid,
EMAP_FUNC_MSG, ecrit);
else
sprintf(msg, "Iteration %7d; %s %7.2f", iteration, EMAP_FUNC_MSG, ecrit);
print_line(outdev, msg, PCP_QLEN);
}
else
*errc = errno;
free(msg);
}
return ecrit;
}
/*
Return the chosen dimension reduction method used to generate
starting point for emap(). The methods are those listed in file
pcp.h.
*/
static int input_init_method(FILE *outdev, int lxd, int nv, int d, int c)
{
int method;
int default_method;
int len;
int *methods;
char *msg;
len = 0;
methods = malloc(100*sizeof(int)); /* this should be more than we will ever need */
msg = malloc((PCP_QLEN+1)*sizeof(char));
if ((c == 2) && (nv <= d))
{
len = 3;
methods[0] = PDR_SVD;
methods[1] = PDR_RANDOM;
methods[2] = PDR_FILE;
default_method = PDR_SVD;
sprintf(msg, PCP_UMSG_EMAP_S1, PDR_SVD, PDR_RANDOM, PDR_FILE, default_method);
}
else if ((c == 2) && (nv > d))
{
/*
For Fisher's linear discriminant, the allowed values of
mapping space dimension are <= c-1.
*/
if (lxd <= c-1)
{
len = 4;
methods[0] = PDR_PCA;
methods[1] = PDR_FISHER;
methods[2] = PDR_RANDOM;
methods[3] = PDR_FILE;
default_method = PDR_PCA;
sprintf(msg, PCP_UMSG_EMAP_S2, PDR_FISHER, PDR_PCA, PDR_RANDOM, PDR_FILE, default_method);
}
else
{
len = 3;
methods[0] = PDR_PCA;
methods[1] = PDR_RANDOM;
methods[2] = PDR_FILE;
default_method = PDR_PCA;
sprintf(msg, PCP_UMSG_EMAP_S3, PDR_PCA, PDR_RANDOM, PDR_FILE, default_method);
}
}
else if ((c > 2) && (nv <= d))
{
len = 4;
methods[0] = PDR_SVD;
methods[1] = PDR_RANDOM;
methods[2] = PDR_FILE;
default_method = PDR_SVD;
sprintf(msg, PCP_UMSG_EMAP_S1, PDR_SVD, PDR_RANDOM, PDR_FILE, default_method);
}
else if ((c > 2) && (nv > d))
{
if (lxd <= c-1)
{
len = 4;
methods[0] = PDR_PCA;
methods[1] = PDR_FISHER;
methods[2] = PDR_RANDOM;
methods[3] = PDR_FILE;
default_method = PDR_PCA;
sprintf(msg, PCP_UMSG_EMAP_S2, PDR_FISHER, PDR_PCA, PDR_RANDOM, PDR_FILE, default_method);
}
else
{
len = 3;
methods[0] = PDR_PCA;
methods[1] = PDR_RANDOM;
methods[2] = PDR_FILE;
default_method = PDR_PCA;
sprintf(msg, PCP_UMSG_EMAP_S3, PDR_PCA, PDR_RANDOM, PDR_FILE, default_method);
}
}
method = input_choice(stdin, outdev, msg, &default_method, methods, len);
free(msg);
free(methods);
return method;
}
/*
Convert matrix 'mx' ('rows' by 'columns') into vector.
*/
static float *fvec_mx(float **mx, int rows, int columns, int *errc)
{
int i;
int j;
int idx;
float *fvec;
fvec = malloc(rows*columns*sizeof(float));
if (fvec)
{
idx = 0;
for (i = 0; i < rows; i++)
for (j = 0; j < columns; j++)
fvec[idx++] = mx[i][j];
}
else
*errc = errno;
return fvec;
}
/*
Return initial transformation of 'dset' from original space to 'lxd'
space, to be used as a starting point for optimization. The
transformation is read from a file or generated pseudo-randomly. If
it is read from a file, return file name in 'fname'.
The transformation is represented as a one-dimensional vector of
length d times lxd, as is required for the simplex() optimization
function.
In case of error, set 'errc'. In case of file error, return name of
offending file in 'fname'.
*/
static float *init_vertex(FILE *indev, FILE *outdev, int lxd, struct dataset *dset,
char **fname, int *errc)
{
int i;
int dc;
int status;
int nx;
float min;
float max;
char *xname;
float *umx; /* initial vertex in vector form */
float **mapping;
float **y; /* temp. var, used to store mapping read from a file */
umx = (float *) 0;
status = input_init_method(outdev, lxd, dset->nv, dset->d, dset->c);
if (status != EOF)
{
if (status == PDR_FILE)
{
xname = input_filename(PCP_UMSG_INIT_VERTEX, (char *) 0, outdev);
if (xname && *xname)
{
nx = file_info(xname, (int *) 0, &dc, '\0');
if ((nx > 0) && (dc > 0))
{
if (nx >= lxd)
{
y = fmx_alloc(lxd, dc);
if (y)
{
status = load_file(xname, y, LAU_FF_RAW, (char **) 0, (char **) 0,
(char **) 0, dc, lxd, errc);
if (status == -1)
{
*fname = strdup(xname);
if (!*fname)
*errc = errno;
}
else
{
umx = fvec_mx(y, lxd, dc, errc);
mx_free((void **) y, lxd);
if (!umx)
*errc = errno;
}
}
else
*errc = errno;
}
else
{
*fname = strdup(xname);
*errc = PERR_INC_DIM;
}
}
else
{
*fname = strdup(xname);
if (nx < 0)
*errc = errno;
else if (dc == -1)
*errc = PERR_BAD_INPUT_FILE;
}
}
else
{
*errc = PERR_BAD_INPUT_FILE;
if (xname && !(*xname))
*fname = strdup("");
}
}
else if (status == PDR_RANDOM)
{
min = 0.0; /* TBD: hardcoded. */
max = 1.0;
umx = malloc(lxd*(dset->d)*sizeof(float));
if (umx)
{
/*rand_float_seed(&seed);*/
for (i = 0; i < lxd*(dset->d); i++)
umx[i] = float_rand()*(max-min)+min;
}
else
*errc = errno;
}
else if ((status == PDR_SVD) || (status == PDR_FISHER) || (status == PDR_PCA))
{
if (status == PDR_SVD)
mapping = svd_transform(dset->x, dset->nv, dset->d, errc);
else if (status == PDR_FISHER)
mapping = fld(dset, errc);
else
mapping = pca(dset, errc);
if (mapping)
{
umx = fvec_mx(mapping, lxd, dset->d, errc);
if (!umx)
*errc = errno;
if (status == PDR_SVD)
mx_free((void **) mapping, dset->nv);
else
mx_free((void **) mapping, lxd);
}
}
}
else
*errc = PERR_BAD_INPUT;
if (*errc != 0)
{
free(umx);
umx = (float *) 0;
}
return umx;
}
/*
Implementation of EMAP functionality (linear feature transformation
which minimizes Bayes error estimate in the transformed space). The
function computes the transformation matrix using the training data
set, and saves it in a file.
In case of error, set 'errc'. If 'errc' is file access error,
'xname' is name of file associated with the error. If 'dbg' is > 0,
send debugging information to debug file.
*/
void p_emap(int *errc, char **xname, int dbg)
{
int i;
int j;
int status;
int lxd;
int ndim;
int seed;
int min_range;
int max_range;
int int_default;
int kmin;
int kmax;
int itmax;
int iter;
float lambda;
float ftol = 1e-6;
float val;
float *smx0;
char *fname = (char *) 0;
char *emap_fname;
char *msg;
float **smx;
float *fval;
FILE *outdev;
FILE *fdbg = (FILE *) 0;
struct emap_crit_parameters *emap_parameters;
*errc = 0;
if (dbg > 0)
fdbg = fopen(PCP_DBG, "a");
outdev = stdout;
clear_screen();
cursor_on();
emap_parameters = (struct emap_crit_parameters *) 0;
if (*errc == 0)
{
min_range = 1;
max_range = tds->d;
msg = malloc((PCP_QLEN+1)*sizeof(char));
sprintf(msg, PCP_UMSG_DIM, min_range, max_range);
lxd = input_integer(stdin, outdev, msg, PCP_QLEN, (int *) 0, &min_range,
&max_range);
free(msg);
emap_fname = input_filename(PMSG_LIN_OUTPUT_FNAME, PCP_EMP, stdout);
/*
The error estimate is calculated for k between 'kmin' and
'kmax', inclusive.
*/
min_range = 1;
max_range = ivec_min(tds->nd, tds->c)-1;
kmin = input_integer(stdin, outdev, PCP_UMSG_KMIN, PCP_QLEN, &min_range, &min_range, &max_range);
msg = malloc((PCP_QLEN+1)*sizeof(char));
int_default = max_range/2;
sprintf(msg, PCP_UMSG_KMAX, kmin, max_range, int_default);
kmax = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &kmin,
&max_range);
free(msg);
seed = input_seed(stdin, outdev);
min_range = 1;
int_default = EMAP_ITMAX; /* default max. number of iterations */
msg = malloc((PCP_QLEN+1)*sizeof(char));
sprintf(msg, PCP_UMSG_MAXIT, int_default);
itmax = input_integer(stdin, outdev, msg, PCP_QLEN, &int_default, &min_range, (int *) 0);
free(msg);
ndim = lxd*(tds->d);
/*
Parameters ready, start processing.
First create initial simplex 'smx': each row represents
one vertex of the simplex. 'smx' has a total of ndim+1
rows, one per vertex.
*/
status = smplx_alloc(&fval, &smx, ndim);
if (!status)
{
/*
Create initial vertex.
*/
smx0 = init_vertex(stdin, outdev, lxd, tds, &fname, errc);
fvec_copy(smx[0], smx0, ndim);
free(smx0);
if (*errc == 0)
{
emap_parameters = malloc(sizeof(struct emap_crit_parameters));
emap_parameters->dset = tds;
emap_parameters->lxd = lxd;
emap_parameters->kmin = kmin;
emap_parameters->kmax = kmax;
emap_parameters->outdev = outdev;
emap_parameters->fdbg = fdbg;
emap_parameters->vid = 0;
emap_parameters->fname = emap_fname;
emap_parameters->eval = FLT_MAX;
fval[0] = emap_func(smx[0], 0, 0, emap_parameters, errc);
if (*errc == 0)
{
/*
Initialize the remaining ndim vertices of the
simplex, using equation (10.4.1) from Numerical
Recipes in C.
*/
lambda = 1.0; /* TBD: hardcoded for now. */
for (i = 1; (i < ndim+1) && (*errc == 0); i++)
{
for (j = 0; j < ndim; j++)
smx[i][j] = smx[0][j];
smx[i][i-1] += lambda;
emap_parameters->vid = i;
fval[i] = emap_func(smx[i], 0, 0, emap_parameters, errc);
}
if (*errc == 0)
{
val = simplex(smx, ndim, fval, emap_func, (void *) emap_parameters,
itmax, ftol, &iter, errc);
mx_free((void **) smx, ndim+1);
vx_free(fval);
if (*errc == 0)
{
msg = malloc(1000);
sprintf(msg, "Optimization completed in %7d iterations; minimum: %7.2f.",
iter, val);
print_line(outdev, msg, PCP_QLEN);
free(msg);
pwait();
}
}
}
else
*xname = strdup(emap_fname);
vx_free((void *) emap_parameters);
}
else if (fname)
*xname = strdup(fname);
}
else
*errc = errno;
}
cursor_off();
}
/*
Compute EMAP transformation matrix for dataset 'dset' into
'dr'-dimensional space. 'kmin' and 'kmax' are parameters used for
the Bayes error estimation.
The function uses PCA (Principal Component Analysis) as initial
point for the simplex optimization if number of input vectors is
greater than dimensionality; otherwise it uses SVD (Singular Value
Decomposition). The max. number of iterations is fixed to
EMAP_MAX_ITER.
In case of success, return the transformation matrix. In case of
failure, return NULL and set 'errc'. The errors are EINVAL for bad
arguments, memory allocation errors,
*/
float **emap(struct dataset *dset, int dr, int kmin, int kmax, int *errc)
{
int i;
int j;
int status;
int iter;
int ndim;
int nv;
int d;
int idx;
float val;
float lambda;
float ftol;
float *fval;
float *smx0;
float **smx;
float **mx;
float **map;
struct emap_crit_parameters *emap_parameters;
map = (float **) 0;
if (dset && errc && (kmin >= 1) && (kmax >= 1) && (kmax >= kmin))
{
ftol = 1e-6;
d = dset->d;
nv = dset->nv;
ndim = dr*d;
status = smplx_alloc(&fval, &smx, ndim);
if (!status)
{
/*
Create initial vertex.
*/
if (nv <= d)
mx = svd_transform(dset->x, nv, d, errc);
else
mx = pca(dset, errc);
if (mx)
{
smx0 = fvec_mx(mx, dr, d, errc);
if (smx0)
{
fvec_copy(smx[0], smx0, ndim);
if (nv <= d)
mx_free((void **) mx, nv);
else
mx_free((void **) mx, dr);
free(smx0);
if (*errc == 0)
{
emap_parameters = malloc(sizeof(struct emap_crit_parameters));
emap_parameters->dset = dset;
emap_parameters->lxd = dr;
emap_parameters->kmin = kmin;
emap_parameters->kmax = kmax;
emap_parameters->outdev = (FILE *) 0;
emap_parameters->fdbg = (FILE *) 0;
emap_parameters->vid = 0;
emap_parameters->fname = (char *) 0;
emap_parameters->eval = FLT_MAX;
fval[0] = emap_func(smx[0], 0, 0, emap_parameters, errc);
if (*errc == 0)
{
/*
Initialize the remaining ndim vertices of the
simplex, using equation (10.4.1) from Numerical
Recipes in C.
*/
lambda = 1.0; /* TBD: hardcoded for now. */
for (i = 1; (i < ndim+1) && (*errc == 0); i++)
{
for (j = 0; j < ndim; j++)
smx[i][j] = smx[0][j];
smx[i][i-1] += lambda;
emap_parameters->vid = i;
fval[i] = emap_func(smx[i], 0, 0, emap_parameters, errc);
}
if (*errc == 0)
{
val = simplex(smx, ndim, fval, emap_func, (void *) emap_parameters,
EMAP_ITMAX, ftol, &iter, errc);
idx = fvec_argmin(fval, ndim+1);
map = fmx_fvec(smx[idx], ndim, dr, d);
mx_free((void **) smx, ndim+1);
}
}
}
}
}
}
else if (errc)
*errc = EINVAL;
}
return map;
}
syntax highlighted by Code2HTML, v. 0.9.1