/*
File name: svd.c
Created by: Ljubomir Buturovic
Created: 02/22/2002
Purpose: Singular Value Decomposition (SVD) mapping for
high-dimensional data.
*/
/*
Copyright 2004 Ljubomir J. Buturovic
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/*
The principal function in this file is svd(). It implements linear
transformation of the input data using Singular Value Decomposition
(SVD) of a covariance matrix. This algorithm is a specialized
version of Karhunen-Loeve transform (also known as principal
component analysis) for high-dimensional data. It calculates
transformation matrix 'u' corresponding to 'idr' biggest eigenvalues
of the covariance matrix of the input matrix data matrix by
performing the eigenanalysis in the 'nvec' dimensional space,
instead of in the 'd'-dimensional space of the input vectors. The
function should be used when the number of available vectors is
lower than the dimension (length) of the vectors. In such cases,
svd() is more efficient than the standard approach (of performing
the analysis in the original 'd'-dimensional space).
*/
static char rcsid[] = "$Id: svd.c,v 1.52 2006/04/21 07:09:42 ljubomir Exp $";
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include "pcp.h"
#include "lmat.h"
#include "lau.h"
#include "pau.h"
#include "bagging.h"
#include "adaboost.h"
/*
Log MX^T*MX (transposed matrix 'mx' times itself). Used to verify
eigenvector matrices.
*/
static void log_mxt_mx(float **mx, int rows, int columns, FILE *fdbg)
{
int i;
int j;
int k;
double s;
float *amx;
if (fdbg)
{
amx = malloc(columns*sizeof(float));
for (i = 0; i < columns; i++)
{
for (j = 0; j < columns; j++)
{
s = 0.0;
for (k = 0; k < rows; k++)
s += mx[k][i]*mx[k][j];
amx[j] = s;
}
fprintf(fdbg, "mxt*mx row %d\n", i);
for (j = 0; j < columns; j++)
fprintf(fdbg, "%12.5g ", amx[j]);
fprintf(fdbg, "\n");
}
free(amx);
}
}
/*
Calculate Singular Value Decomposition (SVD) mapping (a specialized
version of Karhunen-Loeve transform) for 'nvec' 'd'-dimensional
input vectors 'x'. Return 'nvec' by 'd' transformation matrix 'u',
where 'nvec' is the number of input vectors in 'x'. Input vectors
x[0][0..d-1], x[1][0..d-1], x[2][0..d-1], ..., x[nvec-1][0..d-1] are
assumed to be such that d << nvec (otherwise use the regular KL
transform).
Return -1 in case of error and set 'errc'. Errors are malloc()
errors and LERR_ITMAX in eigen().
The code follows the logic of the article Orly Alter, Patrick
O. Brown, and David Botstein, 'Singular value decomposition for
genome-wide expression data processing and modeling', Proceedings of
the National Academy of Sciences, vol 97, no. 18, August 29, 2000,
pp. 10101-10106, section 'Mathematical Framework: Singular Value
Decomposition'.
*/
static int svd(float **xmx, int nvec, int d, float ***u, FILE *fdbg, int *errc)
{
int i;
int j;
int k;
int status;
int use_correlation = 0;
float xfc;
double s;
float *xmean;
float **sigma;
float **umx;
float *evl;
float **evx;
/*
For use_correlation == 1, use correlation matrix to calculate u
(as in the original reference); otherwise, use the covariance
matrix, as is commonly done.
*/
xmean = mest(xmx, d, nvec);
if (use_correlation == 1)
{
for (i = 0; i < d; i++)
xmean[i] = 0.0;
sigma = cest(xmx, d, nvec, CORRELATION_MATRIX);
}
else
sigma = cest(xmx, d, nvec, COVARIANCE_MATRIX);
if (sigma && xmean)
{
status = eigsn_column(sigma, d, &evl, &evx, errc);
if (status == 0)
{
if (fdbg)
{
for (i = 0; i < d; i++)
{
fprintf(fdbg, "svd(); evl[%d]: %f\n", i, evl[i]);
fprintf(fdbg, "svd(); ");
for (j = 0; j < d; j++)
fprintf(fdbg, "evx[%d][%d]: %f; ", j, i, evx[j][i]);
fprintf(fdbg, "\n");
}
fflush(fdbg);
fprintf(fdbg, "eigenvectors ev^T * ev:\n");
log_mxt_mx(evx, d, d, fdbg);
}
/*
Now calculate umx as amx*evx*sqrt(evl^(-1)). amx is
(xmx-xmean)/sqrt(nvec-1) (see R&D, vol. III, 02/25/2002).
First multiply each eigenvector with its'
eigenvalue^(-1/2).
*/
for (j = 0; j < d; j++)
{
/*
Ill-conditioned covariance matrices may generate
negative eigenvalues. Set them to 0.
TBD: examine this.
*/
if (evl[j] > 0)
evl[j] = sqrt(1.0/evl[j]);
else
evl[j] = 0.0;
}
if (fdbg)
{
fprintf(fdbg, "sqrt(eps^(-1)):\n");
for (j = 0; j < d; j++)
fprintf(fdbg, "%g ", evl[j]);
fprintf(fdbg, "\n");
}
for (i = 0; i < d; i++) /* eigenvector index */
for (j = 0; j < d; j++)
evx[j][i] = evx[j][i]*evl[i];
if (fdbg)
{
fprintf(fdbg, "ev * eps^(-1):\n");
log_mx(evx, d, d, fdbg);
}
umx = fmx_alloc(nvec, d);
if (umx)
{
/*
Now multiply result with xmx-xmean.
*/
if (use_correlation == 1)
xfc = 1.0;
else
{
xfc = sqrt(nvec-1);
xfc = 1.0/xfc;
}
for (i = 0; i < nvec; i++)
for (j = 0; j < d; j++)
{
s = 0.0;
for (k = 0; k < d; k++)
s += (xmx[i][k]-xmean[k])*evx[k][j];
umx[i][j] = s*xfc;
}
if (fdbg)
{
fprintf(fdbg, "transformation matrix u:\n");
log_mx(umx, nvec, d, fdbg);
fprintf(fdbg, "transformation matrix u^T * u:\n");
log_mxt_mx(umx, nvec, d, fdbg);
}
}
else
status = -1;
if (fdbg)
{
correst(sigma, d);
fprintf(fdbg, "matrix of linear correlation coefficients:\n");
log_mx(sigma, d, d, fdbg);
}
/*
Free sigma and xmean.
*/
mx_free((void **) sigma, d);
free(xmean);
*u = umx;
vx_free(evl);
mx_free((void **) evx, d);
}
}
else
{
status = -1;
*errc = errno;
}
return status;
}
/*
Interface to svd(), a linear transformation of high-dimensional
vectors using Singular Value Decomposition (SVD) of a covariance
matrix (a variant of Karhunen-Loeve transform).
In case of error, set 'errc'. In case of file error, set the file
name in 'xname'.
Error codes: 1. this function assumes that the rows of 'input' are
high-dimensional, that is typically nvec << d. If nvec >= d, set
'errc' to PERR_INC_DIM.
*/
void p_svd(int *errc, char **xname, int dbg)
{
int itd;
int status;
int mode;
int nsamples;
char *fname;
float **xmx;
float **umx;
float **svdx;
FILE *outdev;
FILE *fdbg = (FILE *) 0;
status = 0;
mode = 0; /* 0: use training and test datasets to compute SVD */
/* 1: use training dataset to compute SVD */
if (tds)
{
if (teds)
{
clear_screen();
cursor_on();
mode = input_transform_mode();
}
nsamples = tds->nv;
if (mode == 0)
nsamples += teds->nv;
/*
This transformation is executed only if number of samples is
less or equal than dimension.
*/
if (nsamples > tds->d)
*errc = PERR_INCONSISTENT_SVD;
else
{
outdev = stdout;
if (dbg > 0)
fdbg = fopen(PCP_DBG, "a");
if (!teds)
{
clear_screen();
cursor_on();
}
itd = input_d(stdin, outdev, tds->d, nsamples);
fname = input_filename(PMSG_LIN_OUTPUT_FNAME, PCP_LIN, outdev);
cursor_off();
print_line(outdev, PCP_UMSG_SVD, PCP_QLEN);
if (mode == 1)
svdx = tds->x;
else
{
svdx = combine_x(tds->x, tds->nv, teds->x, teds->nv);
if (!svdx)
{
status = -1;
*errc = errno;
}
}
if (!status)
{
xmx = fmx_transpose(svdx, nsamples, tds->d);
if (mode == 0)
vx_free(svdx);
status = svd(xmx, tds->d, nsamples, &umx, fdbg, errc);
/*
After svd(), xmx is d by nsamples, umx is nsamples by
d.
*/
mx_free((void **) xmx, tds->d);
if (status == 0)
{
status = fmx_save(umx, tds->d, itd, fname, 1);
mx_free((void **) umx, tds->d);
if (status == -1)
{
*errc = errno;
*xname = strdup(fname);
}
}
}
}
}
else
*errc = PERR_UNDEFINED_TDS;
}
/*
Feature extraction using SVD. For input 'nv' by 'd' data matrix 'x',
returns linear transformation matrix 'd' by 'nv'. It is assumed that
'nv' is <= 'd'.
If nv is > d, return NULL. In case of failure to iterate in eigen(),
return NULL and set errc to LERR_ITMAX. In case of malloc() error,
return NULL and set errc to the corresponding errno.
*/
float **svd_transform(float **x, int nv, int d, int *errc)
{
int status;
float **svd_transformation;
float **umx;
float **xmx;
svd_transformation = (float **) 0;
if (nv <= d)
{
/*
TBD: the two transposes are a kludge. Instead, svd() should
take nv by d input.
*/
xmx = fmx_transpose(x, nv, d);
if (xmx)
{
status = svd(xmx, d, nv, &umx, (FILE *) 0, errc);
if (!status)
{
/*
xmx and umx are d by nv.
*/
svd_transformation = fmx_transpose(umx, d, nv);
/*
svd_transformation is nv by d.
*/
if (!svd_transformation)
*errc = errno;
mx_free((void **) umx, d);
}
mx_free((void **) xmx, d);
}
else
*errc = errno;
}
return svd_transformation;
}
syntax highlighted by Code2HTML, v. 0.9.1