/*
  File name: svd.c
  Created by: Ljubomir Buturovic
  Created: 02/22/2002
  Purpose: Singular Value Decomposition (SVD) mapping for
  high-dimensional data.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

/*
  The principal function in this file is svd(). It implements linear
  transformation of the input data using Singular Value Decomposition
  (SVD) of a covariance matrix. This algorithm is a specialized
  version of Karhunen-Loeve transform (also known as principal
  component analysis) for high-dimensional data. It calculates
  transformation matrix 'u' corresponding to 'idr' biggest eigenvalues
  of the covariance matrix of the input matrix data matrix by
  performing the eigenanalysis in the 'nvec' dimensional space,
  instead of in the 'd'-dimensional space of the input vectors. The
  function should be used when the number of available vectors is
  lower than the dimension (length) of the vectors. In such cases,
  svd() is more efficient than the standard approach (of performing
  the analysis in the original 'd'-dimensional space).
*/

static char rcsid[] = "$Id: svd.c,v 1.52 2006/04/21 07:09:42 ljubomir Exp $";

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include "pcp.h"
#include "lmat.h"
#include "lau.h"
#include "pau.h"
#include "bagging.h"
#include "adaboost.h"

/*
  Log MX^T*MX (transposed matrix 'mx' times itself). Used to verify
  eigenvector matrices.
*/
static void log_mxt_mx(float **mx, int rows, int columns, FILE *fdbg)
{
  int i;
  int j;
  int k;
  double s;
  float *amx;

  if (fdbg)
    {
      amx = malloc(columns*sizeof(float));
      for (i = 0; i < columns; i++)
	{
	  for (j = 0; j < columns; j++)
	    {
	      s = 0.0;
	      for (k = 0; k < rows; k++)
		s += mx[k][i]*mx[k][j];
	      amx[j] = s;
	    }
	  fprintf(fdbg, "mxt*mx row %d\n", i);
	  for (j = 0; j < columns; j++)
	    fprintf(fdbg, "%12.5g ", amx[j]);
	  fprintf(fdbg, "\n");
	}
      free(amx);
    }
}

/*
  Calculate Singular Value Decomposition (SVD) mapping (a specialized
  version of Karhunen-Loeve transform) for 'nvec' 'd'-dimensional
  input vectors 'x'.  Return 'nvec' by 'd' transformation matrix 'u',
  where 'nvec' is the number of input vectors in 'x'. Input vectors
  x[0][0..d-1], x[1][0..d-1], x[2][0..d-1], ..., x[nvec-1][0..d-1] are
  assumed to be such that d << nvec (otherwise use the regular KL
  transform).

  Return -1 in case of error and set 'errc'. Errors are malloc()
  errors and LERR_ITMAX in eigen().

  The code follows the logic of the article Orly Alter, Patrick
  O. Brown, and David Botstein, 'Singular value decomposition for
  genome-wide expression data processing and modeling', Proceedings of
  the National Academy of Sciences, vol 97, no. 18, August 29, 2000,
  pp. 10101-10106, section 'Mathematical Framework: Singular Value
  Decomposition'.  

*/
static int svd(float **xmx, int nvec, int d, float ***u, FILE *fdbg, int *errc)
{
  int    i;
  int    j;
  int    k;
  int    status;
  int    use_correlation = 0;
  float  xfc;
  double s;
  float  *xmean;
  float  **sigma;
  float  **umx;
  float  *evl;
  float  **evx;

  /*
    For use_correlation == 1, use correlation matrix to calculate u
    (as in the original reference); otherwise, use the covariance
    matrix, as is commonly done.
   */
  xmean = mest(xmx, d, nvec);
  if (use_correlation == 1)
    {
      for (i = 0; i < d; i++)
	xmean[i] = 0.0;
      sigma = cest(xmx, d, nvec, CORRELATION_MATRIX);
    }
  else
    sigma = cest(xmx, d, nvec, COVARIANCE_MATRIX);
  if (sigma && xmean)
    {
      status = eigsn_column(sigma, d, &evl, &evx, errc);
      if (status == 0)
	{
	  if (fdbg)
	    {
	      for (i = 0; i < d; i++)
		{
		  fprintf(fdbg, "svd(); evl[%d]: %f\n", i, evl[i]);
		  fprintf(fdbg, "svd(); ");
		  for (j = 0; j < d; j++)
		    fprintf(fdbg, "evx[%d][%d]: %f; ", j, i, evx[j][i]);
		  fprintf(fdbg, "\n");
		}
	      fflush(fdbg);
	      fprintf(fdbg, "eigenvectors ev^T * ev:\n");
	      log_mxt_mx(evx, d, d, fdbg);
	    }
	  /*
	    Now calculate umx as amx*evx*sqrt(evl^(-1)). amx is
	    (xmx-xmean)/sqrt(nvec-1) (see R&D, vol. III, 02/25/2002).
	    
	    First multiply each eigenvector with its'
	    eigenvalue^(-1/2).
	  */
	  for (j = 0; j < d; j++)
	    {
	      /*
		Ill-conditioned covariance matrices may generate
		negative eigenvalues. Set them to 0.

		TBD: examine this.
	      */
	      if (evl[j] > 0)
		evl[j] = sqrt(1.0/evl[j]);
	      else
		evl[j] = 0.0;
	    }
	  if (fdbg)
	    {
	      fprintf(fdbg, "sqrt(eps^(-1)):\n");
	      for (j = 0; j < d; j++)
		fprintf(fdbg, "%g ", evl[j]);
	      fprintf(fdbg, "\n");
	    }
	  for (i = 0; i < d; i++) /* eigenvector index */
	    for (j = 0; j < d; j++)
	      evx[j][i] = evx[j][i]*evl[i];
	  if (fdbg)
	    {
	      fprintf(fdbg, "ev * eps^(-1):\n");
	      log_mx(evx, d, d, fdbg);
	    }
	  umx = fmx_alloc(nvec, d);
	  if (umx)
	    {
	      /*
		Now multiply result with xmx-xmean.
	      */
	      if (use_correlation == 1)
		xfc = 1.0;
	      else
		{
		  xfc = sqrt(nvec-1);
		  xfc = 1.0/xfc;
		}
	      for (i = 0; i < nvec; i++)
		for (j = 0; j < d; j++)
		  {
		    s = 0.0;
		    for (k = 0; k < d; k++)
		      s += (xmx[i][k]-xmean[k])*evx[k][j];
		    umx[i][j] = s*xfc;
		  }
	      if (fdbg)
		{
		  fprintf(fdbg, "transformation matrix u:\n");
		  log_mx(umx, nvec, d, fdbg);
		  fprintf(fdbg, "transformation matrix u^T * u:\n");
		  log_mxt_mx(umx, nvec, d, fdbg);
		}
	    }
	  else 
	    status = -1;
	  if (fdbg)
	    {
	      correst(sigma, d);
	      fprintf(fdbg, "matrix of linear correlation coefficients:\n");
	      log_mx(sigma, d, d, fdbg);
	    }
	  /*
	    Free sigma and xmean.
	  */
	  mx_free((void **) sigma, d);
	  free(xmean);
	  *u = umx;
	  vx_free(evl);
	  mx_free((void **) evx, d);
	}  
    }
  else
    {
      status = -1;
      *errc = errno;
    }
  return status;
}

/*
  Interface to svd(), a linear transformation of high-dimensional
  vectors using Singular Value Decomposition (SVD) of a covariance
  matrix (a variant of Karhunen-Loeve transform).

  In case of error, set 'errc'. In case of file error, set the file
  name in 'xname'.

  Error codes: 1. this function assumes that the rows of 'input' are
  high-dimensional, that is typically nvec << d. If nvec >= d, set
  'errc' to PERR_INC_DIM.
*/
void p_svd(int *errc, char **xname, int dbg)
{
  int    itd;
  int    status;
  int    mode;
  int    nsamples;
  char   *fname;
  float  **xmx;
  float  **umx;
  float  **svdx;
  FILE   *outdev;
  FILE   *fdbg = (FILE *) 0;

  status = 0;
  mode = 0; /* 0: use training and test datasets to compute SVD */
            /* 1: use training dataset to compute SVD */
  if (tds)
    {
      if (teds)
	{
	  clear_screen();
	  cursor_on();
	  mode = input_transform_mode();
	}
      nsamples = tds->nv;
      if (mode == 0)
	nsamples += teds->nv;
      /*
	This transformation is executed only if number of samples is
	less or equal than dimension.
      */
      if (nsamples > tds->d)
	*errc = PERR_INCONSISTENT_SVD;
      else
	{
	  outdev = stdout;
	  if (dbg > 0)
	    fdbg = fopen(PCP_DBG, "a");
	  if (!teds)
	    {
	      clear_screen();
	      cursor_on();
	    }
	  itd = input_d(stdin, outdev, tds->d, nsamples);
	  fname = input_filename(PMSG_LIN_OUTPUT_FNAME, PCP_LIN, outdev);
	  cursor_off();
	  print_line(outdev, PCP_UMSG_SVD, PCP_QLEN);
	  if (mode == 1)
	    svdx = tds->x;
	  else
	    {
	      svdx = combine_x(tds->x, tds->nv, teds->x, teds->nv);
	      if (!svdx)
		{
		  status = -1;
		  *errc = errno;
		}
	    }
	  if (!status)
	    {
	      xmx = fmx_transpose(svdx, nsamples, tds->d);
	      if (mode == 0)
		vx_free(svdx);
	      status = svd(xmx, tds->d, nsamples, &umx, fdbg, errc);
	      /*
		After svd(), xmx is d by nsamples, umx is nsamples by
		d.
	      */
	      mx_free((void **) xmx, tds->d);
	      if (status == 0)
		{
		  status = fmx_save(umx, tds->d, itd, fname, 1);
		  mx_free((void **) umx, tds->d);
		  if (status == -1)
		    {
		      *errc = errno;
		      *xname = strdup(fname);
		    }
		}
	    }
	}
    }
  else
    *errc = PERR_UNDEFINED_TDS;
}

/*
  Feature extraction using SVD. For input 'nv' by 'd' data matrix 'x',
  returns linear transformation matrix 'd' by 'nv'. It is assumed that
  'nv' is <= 'd'.

  If nv is > d, return NULL. In case of failure to iterate in eigen(),
  return NULL and set errc to LERR_ITMAX.  In case of malloc() error,
  return NULL and set errc to the corresponding errno.
*/
float **svd_transform(float **x, int nv, int d, int *errc)
{
  int   status;
  float **svd_transformation;
  float **umx;
  float **xmx;

  svd_transformation = (float **) 0;
  if (nv <= d)
    {
      /*
	TBD: the two transposes are a kludge. Instead, svd() should
	take nv by d input.
       */
      xmx = fmx_transpose(x, nv, d);
      if (xmx)
	{
	  status = svd(xmx, d, nv, &umx, (FILE *) 0, errc);
	  if (!status)
	    {
	      /*
		xmx and umx are d by nv.
	      */
	      svd_transformation = fmx_transpose(umx, d, nv);
	      /*
		svd_transformation is nv by d.
	      */
	      if (!svd_transformation)
		*errc = errno;
	      mx_free((void **) umx, d);
	    }
	  mx_free((void **) xmx, d);
	}
      else
	*errc = errno;
    }
  return svd_transformation;
}



syntax highlighted by Code2HTML, v. 0.9.1