/*
  Module name: lmat.c
  Created by: Ljubomir Buturovic
  Created: 04/19/2002
  Purpose: matrix utilities.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#include <stdlib.h>
#include <errno.h>
#include <math.h>
#include "lerr.h"
#include "lau.h"
#include "lmat.h"

static char rcsid[] = "$Id: lmat.c,v 1.58 2006/05/21 04:03:17 ljubomir Exp $";

/*
  Estimate mean given 'nvec' 'd'-dimensional input vectors in 'x'. If
  'x' is (float **) 0, or 'nvec' < 1, or 'd' < 1, or in case of
  malloc() error, returns (float **) 0, and sets errno in the latter
  case.
*/
float *mest(float **x, int d, int nvec)
{
  int i;
  int j;
  float *xmean;
  double s;
  
  xmean = (float *) 0;
  if ((x != (float **) 0) && (d > 0) && (nvec >= 1))
    {
      /*
	Estimate means.
      */
      xmean = calloc(d, sizeof(float));
      if (xmean != (float *) 0)
	{
	  for (i = 0; i < d; i++)
	    {
	      s = 0.0;
	      for (j = 0; j < nvec; j++)
		s += x[j][i];
	      xmean[i] = s/nvec;
	    }
	}
    }
  return xmean;
}

/*
  Estimate covariance ('imx' == COVARIANCE_MATRIX) or correlation
  matrix ('imx' == CORRELATION_MATRIX) for 'nvec' 'd'-dimensional
  input vectors in 'x'. If 'x' is (float **) 0, or 'nvec' <= 0, or 'd'
  <= 0, or in case of malloc() error, returns (float **) 0, and sets
  errno.
*/
float **cest(float **x, int d, int nvec, int imx)
{
  int i;
  int j;
  int k;
  float *xmean;
  float **sigma;
  double s;

  xmean = (float *) 0;
  sigma = (float **) 0;
  if (x && (d > 0) && (nvec > 0))
    {
      /*
	Estimate means.
      */
      if (imx == COVARIANCE_MATRIX)
	xmean = mest(x, d, nvec);
      sigma = fmx_alloc(d, d);
      if (sigma)
	{
	  /*
	    Estimate covariance/correlation matrix.
	  */
	  for (i = 0; i < d; i++)
	    for (j = i; j < d; j++)
	      {
		s = 0.0;
		if (imx == COVARIANCE_MATRIX)
		  {
		    for (k = 0; k < nvec; k++)
		      s += (x[k][i]-xmean[i])*(x[k][j]-xmean[j]);
		    if (nvec > 1)
		      sigma[i][j] = s/(nvec-1);
		    else
		      sigma[i][j] = s;
		  }
		else
		  {
		    for (k = 0; k < nvec; k++)
		      s += x[k][i]*x[k][j];
		    sigma[i][j] = s;
		  }
	      }
	  for (i = 0; i < d; i++)
	    for (j = 0; j < i; j++)
	      sigma[i][j] = sigma[j][i];
	}
      vx_free(xmean);
    }
  else
    errno = EINVAL;
  return sigma;
}

/*
  Convert covariance matrix 'sigma' to matrix of linear correlation
  coefficients.
*/
int correst(float **sigma, int d)
{
  int status = 0;
  int i;
  int j;
  float *cov_vector;

  if (sigma && (d > 0))
    {
      cov_vector = malloc(d*sizeof(float));
      if (cov_vector)
	{
	  for (i = 0; (i < d) && !status; i++)
	    {
	      /*
		Return if any of the covariances are zero.
	      */
	      if (sigma[i][i] != 0)
		cov_vector[i] = sigma[i][i];
	      else
		status = -1;
	    }
	  if (status != -1)
	    {
	      for (i = 0; i < d; i++)
		for (j = 0; j < d; j++)
		  sigma[i][j] = sigma[i][j]/sqrt(cov_vector[i]*cov_vector[j]);
	    }
	  free(cov_vector);
	}
      else
	status = -1;
    }
  else
    status = -1;
  return status;
}

/*
  Convert symmetric 'matrix' to Symmetric Storage Mode (SSM). Return
  (float *) 0 in case of malloc() error and set errno.
*/
float *fmx_ssm(float **matrix, int nmx)
{
  int   i;
  int   j;
  int   idx;
  float *ssm;

  idx = nmx+1;
  idx = nmx*(nmx+1)/2;
  ssm = malloc(idx*sizeof(float));
  if (ssm != (float *) 0)
    {
      idx = 0;
      for (i = 0; i < nmx; i++)
	for (j = 0; j <= i; j++)
	  ssm[idx++] = matrix[i][j];
    }
  return ssm;
}

/*
  Compute scalar product of vectors 'avec' and 'bvec' of length
  'n'. If 'errc' is not NULL, set 'errc' to -1 in case of illegal
  arguments, otherwise set it to 0.
*/
float fvec_dot(float *avec, float *bvec, int n, int *errc)
{
  int    i;
  float  scalar;
  double dsum;

  scalar = 0.0;
  if (errc)
    *errc = 0;
  if (!avec || !bvec || (n <= 0))
    {
      if (errc)
	*errc = -1;
    }
  else
    {
      dsum = 0.0;
      for (i = 0; i < n; i++)
	dsum += avec[i]*bvec[i];
      scalar = dsum;
    }
  return scalar;
}

/*
  Compute scalar product of vectors 'avec' and 'bvec' of length
  'n'. If 'errc' is not NULL, set 'errc' to -1 in case of illegal
  arguments, otherwise set it to 0.
*/
double dvec_dot(double *avec, double *bvec, int n, int *errc)
{
  int    i;
  double scalar;
  double dsum;
  
  scalar = 0.0;
  if (errc)
    *errc = 0;
  if (!avec || !bvec || (n <= 0))
    {
      if (errc)
	*errc = -1;
    }
  else
    {
      dsum = 0.0;
      for (i = 0; i < n; i++)
	dsum += avec[i]*bvec[i];
      scalar = dsum;
    }
  return scalar;
}

/*
  Set elements of 'vec' to 'value'.
*/
void dvec_set(double *vec, int len, double value)
{
  int i;

  if (vec && len > 0)
    for (i = 0; i < len; i++)
      vec[i] = value;
}

/*
  Copy double vector 'src' into 'dest'.
*/
void fvec_dcopy(float *dest, double *src, int len)
{
  int i;

  if ((len > 0) && dest && src)
    for (i = 0; i < len; i++)
      dest[i] = src[i];
}

/*
  Copy float vector 'src' into 'dest'.
*/
void dvec_fcopy(double *dest, float *src, int len)
{
  int i;

  if ((len > 0) && dest && src)
    for (i = 0; i < len; i++)
      dest[i] = src[i];
}

/*
  Compute difference between vectors 'avec' and 'bvec' of length 'n'
  and store it to 'outvec'. If 'errc' is not NULL, set 'errc' to -1 in
  case of illegal arguments, otherwise set it to 0.
*/
void fvec_diff(float *avec, float *bvec, float *outvec, int n, int *errc)
{
  int i;

  if (errc != (int *) 0)
    *errc = 0;
  if ((avec == (float *) 0) || (bvec == (float *) 0) || (outvec == (float *) 0) || (n <= 0))
    {
      if (errc != (int *) 0)
	*errc = -1;
    }
  else
    {
      for (i = 0; i < n; i++)
	outvec[i] = avec[i]-bvec[i];
    }
}

/*
  Multiply vector 'vector' of length 'd' and symmetric 'matrix'
  d*d. The matrix is stored in IMSL Symmetric Storage Mode, and thus
  the array 'matrix' has d(d+1)/2 elements.  Store result in vector
  'mx' (must be preallocated).
*/
void fvec_smx(float *vector, float *matrix, int d, float *mx)
{
  int k;
  int j;
  int di1;
  int di2;
  double dsum;

  /*
    Outer loop (by columns of 'matrix').
  */
  di1 = 0;
  for (k = 0; k < d; k++)
    {
      /*
	Multiply with elements on the main diagonal and above. 'di1'
	is index of the first element in the column.
      */
      di1 += k;
      dsum = 0.0;
      for (j = 0; j <= k; j++)
	dsum += vector[j]*matrix[di1+j];
      /* 
	 Multiply with elements below the main diagonal, indexed by
	 'di2'.
      */
      di2 = di1+k;
      for (j = k+1; j < d; j++)
	{
	  di2 += j;
	  dsum += vector[j]*matrix[di2];
	}
      mx[k] = dsum;
    }
}

/*
  Multiply vector 'vector' of length 'd' and symmetric 'matrix'
  d*d. The matrix is stored in IMSL Symmetric Storage Mode, and thus
  the array 'matrix' has d(d+1)/2 elements. Return the result (which
  is a vector of length 'd').
  
  In case of error, return NULL and set errno. The errors are EINVAL
  and malloc(() errors.
*/
float *fvec_ssm(float *vector, float *matrix, int d)
{
  float *fvec;

  fvec = (float *) 0;
  if (vector && matrix)
    {
      fvec = malloc(d*sizeof(float));
      if (fvec)
	fvec_smx(vector, matrix, d, fvec);
    }
  return fvec;
}

/*
  Return norm of 'vector' of length 'len'.
*/
float fvec_norm(float *vector, int len)
{
  float norm;
  int   i;
  double dsum = 0.0;
  
  if ((vector != (float *) 0))
    for (i = 0; i < len; i++)
      dsum += vector[i]*vector[i];
  norm = sqrt(dsum);
  return norm;
}

/*
  Return norm of double 'vector' of length 'len'.
*/
double dvec_norm(double *vector, int len)
{
  float norm;
  int   i;
  double dsum = 0.0;
  
  if (vector)
    for (i = 0; i < len; i++)
      dsum += vector[i]*vector[i];
  norm = sqrt(dsum);
  return norm;
}

/*
  Return normalized 'vector' of length 'len' (i.e., the returned
  vector has norm 1). Return (float *) 0 in case of malloc() error and
  set errno.
*/
float *fvec_normalize(float *vector, int len)
{
  int   i;
  float norm;
  float *fvec = (float *) 0;

  norm = fvec_norm(vector, len);
  if (norm > 0.0)
    {
      fvec = malloc(len*sizeof(float));
      if (fvec != (float *) 0)
	for (i = 0; i < len; i++)
	  fvec[i] = vector[i]/norm;
    }
  return fvec;
}

/*
  Return transpose of 'matrix'.
*/
float **fmx_transpose(float **matrix, int rows, int columns)
{
  int   i;
  int   j;
  float **fmx = (float **) 0;

  fmx = fmx_alloc(columns, rows);
  if (fmx)
    {
      for (i = 0; i < rows; i++)
	for (j = 0; j < columns; j++)
	  fmx[j][i] = matrix[i][j];
    }
  return fmx;
}

static int eigsn_init(int imx, int lwork, float **evl, float ***evx, float **a, 
		      float **w, float **work)
{
  int status = 0;

  if (evl)
    {
      *evl = malloc(imx*sizeof(float)); 
      if (!(*evl))
	status = -1;
    }
  if (!status)
    {
      if (evx)
	*evx = fmx_alloc(imx, imx);
      if (!evx || (*evx && evx))
	{
	  *a = malloc(imx*imx*sizeof(float));
	  if (*a)
	    {
	      *w = malloc(imx*sizeof(float));
	      if (*w)
		{
		  *work = malloc(lwork*sizeof(float));
		  if (!*work)
		    status = -1;
		}
	      else
		status = -1;
	    }
	  else
	    status = -1;
	}
      else
	status = -1;
    }
  return status;
}

static void eigsv_free(float *a, float *w, float *work)
{
  free(a);
  free(w);
  free(work);
}

/*
  Calculate eigenvalues and eigenvectors of an input 'imx' by 'imx'
  real symmetric matrix 'amx' using LAPACK function ssyev_().  The
  eigenvalues are returned in 'evl' in descending order, and the
  corresponding eigenvectors in columns of 'evx'.  Column (**evx)[i]
  corresponds to eigenvalue (*evl)[i]. In other words, upon return,
  eigenvector corresponding to largest eigenvalue will be in
  evx[0][0], evx[1][0], ... evx[imx-1][0]. Eigenvector corresponding
  to next largest eigenvalue will be in evx[0][1], evx[1][1],
  ... evx[imx-1][1], etc.

  Return 0 for success, -1 for malloc() failure, and set 'errc'. The
  errors are malloc() errors, LERR_ITMAX or EINVAL. LERR_ITMAX means
  the LAPACK function ssyev_() failed to converge.  EINVAL means bad
  input arguments: for example, 'imx' <= 0, or aml/evl NULL.

  Note: eigenvectors returned as columns for compatibility. Use
  eigsn() to get eigenvectors in rows.
*/
int eigsn_column(float **amx, int imx, float **evl, float ***evx, int *errc)
{
  int   i;
  int   j;
  int   idx;
  int   status;
  char  jobz;
  char  uplo;
  int   lwork;
  int   info;
  float *a;
  float *w;
  float *work;

  status = 0;
  if ((imx > 0) && amx)
    {
      jobz = 'V';
      uplo = 'U';
      lwork = 3*imx; /* TBD: optimize */
      status = eigsn_init(imx, lwork, evl, evx, &a, &w, &work);
      if (!status)
	{
	  idx = 0;
	  for (i = 0; i < imx; i++)
	    for (j = 0; j < imx; j++)
	      a[idx++] = amx[j][i];
	  ssyev_(&jobz, &uplo, &imx, a, &imx, w, work, &lwork, &info);
	  if (info)
	    {
	      if (errc)
		{
		  if (info > 0)
		    *errc = LERR_ITMAX;
		  else
		    *errc = EINVAL;
		}
	      status = -1;
	    }
	  else
	    {
	      if (evl)
		{
		  for (i = 0; i < imx; i++)
		    (*evl)[i] = w[i];
		  fvec_reverse(*evl, imx);
		}
	      idx = 0;
	      if (evx)
		{
		  for (i = 0; i < imx; i++)
		    for (j = 0; j < imx; j++)
		      (*evx)[j][i] = a[idx++];
		  for (i = 0; i < imx; i++)
		    fvec_reverse((*evx)[i], imx);
		}
	    }
	  eigsv_free(a, w, work);
	}
    }
  else
    {
      status = -1;
      if (errc)
	*errc = EINVAL;
    }
  return status;
}

/*
  Compute eigenvectors and eigenvalues of a real symmetric
  matrix. This is a row-based version of eigsn_column().

  TBD: this should actually be the real computing function;
  eigsn_column() should transpose the result. Right now, it is the
  other way round. Later, not critical. ljb, 02/13/2003.
*/
int eigsn(float **amx, int imx, float **evl, float ***evx, int *errc)
{
  int   status;
  float **tvx;

  status = eigsn_column(amx, imx, evl, evx, errc);
  if (!status && evx)
    {
      tvx = fmx_transpose(*evx, imx, imx);
      mx_free((void **) *evx, imx);
      if (tvx)
	*evx = tvx;
      else
	status = -1;
    }
  return status;
}

/*
  Allocate space for eigen().
*/
static int eigen_init(float **a, float **fmx, int imx, float **scale, float **wr,
		      float **wi, float **vr, float **rconde, float **rcondv, 
		      float ***evx)
{
  int   status;
  int   i;
  int   j;
  int   idx;
  float *ptr;

  status = 0;
  ptr = malloc(imx*imx*sizeof(float));
  if (ptr)
    {
      idx = 0;
      for (i = 0; i < imx; i++)
	for (j = 0; j < imx; j++)
	  {
	    ptr[idx] = fmx[j][i];
	    idx++;
	  }
      *a = ptr;
      *scale = malloc(imx*sizeof(float));
      if (*scale)
	{
	  *wr = malloc(imx*sizeof(float));
	  if (*wr)
	    {
	      *wi = malloc(imx*sizeof(float));
	      if (!*wi)
		status = -1;
	      else
		{
		  *vr = malloc(imx*imx*sizeof(float));
		  if (vr)
		    {
		      *rconde = malloc(imx*sizeof(float));
		      if (!*rconde)
			status = -1;
		      else
			{
			  *rcondv = malloc(imx*sizeof(float));
			  if (!*rcondv)
			    status = -1;
			  else if (evx)
			    {
			      *evx = fmx_alloc(imx, imx);
			      if (!(*evx))
				status = -1;
			    }
			}
		    }
		  else
		    status = -1;
		}
	    }
	}
      else
	status = -1;
    }
  else
    status = -1;
  return status;
}

/*
  Free space used by eigen().
*/
static void eigen_free(float *a, float *scale, float *wr, float *wi, float *vr,
		       float *rconde, float *rcondv)
{
  vx_free(a);
  vx_free(scale);
  vx_free(wr);
  vx_free(wi);
  vx_free(vr);
  vx_free(rconde);
  vx_free(rcondv);
}

/*
  Calculate eigenvalues and right eigenvectors of an input 'imx' by
  'imx' real matrix 'fmx' using LAPACK function sgeevx_().  The
  eigenvalues are returned in 'evl' in descending order, and the
  corresponding eigenvectors in rows of 'evx'.

  Return 0 for success, -1 for malloc() failure, and set 'errc'. The
  errors are malloc() errors, EINVAL, or LERR_ITMAX if the LAPACK
  function fails to converge.
*/
int eigen(float **fmx, int imx, float **evl, float ***evx, int *errc)
{
  int   status;
  int   i;
  int   j;
  int   idx;
  int   info;
  int   lwork;
  int   ilo;
  int   ihi;
  char  balanc;
  char  jobvl;
  char  jobvr;
  char  sense;
  float abnrm;
  int   *iwork;
  float *a;
  float *wr;
  float *wi;
  float *vl;
  float *vr;
  float *scale;
  float *rconde;
  float *rcondv;
  float *work;
  float wk[1];
  float **eptr;

  iwork = (int *) 0; /* not used in sgeevx_(), initialize to stop compiler warnings */
  vl = (float *) 0; /* likewise */
  status = eigen_init(&a, fmx, imx, &scale, &wr, &wi, &vr, &rconde, &rcondv, evx);
  if (!status)
    {
      balanc = 'N';
      jobvl = 'N';
      jobvr = 'V';
      sense = 'N';
      /*
	Get optimal workspace size.
       */
      lwork = -1;
      sgeevx_(&balanc, &jobvl, &jobvr, &sense, &imx, a, &imx, wr, wi, 
	      vl, &imx, vr, &imx, &ilo, &ihi, scale, &abnrm, rconde, rcondv, wk,
	      &lwork, iwork, &info);
      if (!info)
	{
	  lwork = wk[0];
	  work = malloc(lwork*sizeof(float));
	  sgeevx_(&balanc, &jobvl, &jobvr, &sense, &imx, a, &imx, wr, wi, 
		  vl, &imx, vr, &imx, &ilo, &ihi, scale, &abnrm, rconde, rcondv, work,
		  &lwork, iwork, &info);
	  if (!info)
	    {
	      if (evl)
		*evl = wr; /* TBD: handle complex eigenvalues */
	      /*
		Extract right eigenvectors into 'evx'.
	      */
	      if (evx)
		{
		  eptr = *evx;
		  idx = 0;
		  for (i = 0; i < imx; i++)
		    for (j = 0; j < imx; j++)
		      eptr[i][j] = vr[idx++];
		}
	      eigen_free(a, scale, wr, wi, vr, rconde, rcondv);
	    }
	  else
	    status = -1;
	  vx_free(work);
	}
    }
  return status;
}

static int fmx_inv_init(int imx, int lwork, float **a, int **ipiv, float **work)
{
  int status;

  status = 0;
  *a = malloc(imx*imx*sizeof(float));
  if (*a)
    {
      *ipiv = malloc(imx*sizeof(int));
      if (*ipiv)
	{
	  *work = malloc(lwork*sizeof(float));
	  if (!(*work))
	    status = -1;
	}
      else
	status = -1;
    }
  else
    status = -1;
  return status;
}

static void fmx_inv_free(float *work, int *ipiv)
{
  vx_free(work);
  vx_free(ipiv);
}

static void comp_det(float *a, int imx, int *ipiv, float *dsign, float *dexp)
{
  int i;
  int idx;
  int ok_flag;

  ok_flag = 1;
  for (i = 0; (i < imx) && (ok_flag == 1); i++)
    {
      if (ipiv[i] <= 0)
	ok_flag = 0;
    }
  /*
    TBD: for now, we only compute the determinant if the factorization
    of a is strictly diagonal.
  */
  if (ok_flag == 1)
    {
      *dsign = 1.0;
      idx = 0;
      for (i = 0; i < imx; i++)
	{
	  if (a[idx] < 0)
	    *dsign = *dsign*(-1.0);
	  idx += imx+1;
	}
      *dexp = 0.0;
      idx = 0;
      for (i = 0; i < imx; i++)
	{
	  *dexp += log(fabs(a[idx]));
	  idx += imx+1;
	}
    }
}

/*
  Invert real symmetric matrix 'fmx' using LAPACK functions ssytrf_()
  and ssytri_(). If 'dsign' and 'dexp' are not (float *) 0, return
  also the determinant: 'dsign' is the sign of the determinant
  (+/-1.0), and 'dexp' is natural logarithm of the absolute value, so
  the final determinant is dsign*exp(dexp).  The input matrix 'fmx' is
  not modified (the function allocates space for the inverse).

  Return NULL for failure, and set 'errc'. The errors are malloc()
  errors, EINVAL for bad arguments and LERR_SINGULAR for singular
  matrix.
*/
float **fmx_inv(float **fmx, int imx, float *dsign, float *dexp, int *errc)
{
  int   i;
  int   j;
  int   idx;
  int   status;
  char  uplo;
  int   info;
  int   lwork;
  int   *ipiv;
  float *a;
  float *work;
  float workspace[1];
  float **inv;

  if (errc)
    *errc = 0;
  inv = (float **) 0;
  uplo = 'U';
  lwork = -1;
  /*
    Get the optimal size for vector 'work'.
  */
  ssytrf_(&uplo, &imx, a, &imx, ipiv, workspace, &lwork, &info);
  lwork = workspace[0];
  /*
    Initialize workspace, based on the optimal size obtained above.
  */
  status = fmx_inv_init(imx, lwork, &a, &ipiv, &work);
  if (!status)
    {
      idx = 0;
      for (i = 0; i < imx; i++)
	for (j = 0; j < imx; j++)
	  a[idx++] = fmx[i][j];
      ssytrf_(&uplo, &imx, a, &imx, ipiv, work, &lwork, &info);
      if (dsign && dexp)
	comp_det(a, imx, ipiv, dsign, dexp);
      ssytri_(&uplo, &imx, a, &imx, ipiv, work, &info);
      fmx_inv_free(work, ipiv);
      if (!info)
	{
	  inv = fmx_alloc(imx, imx);
	  if (inv)
	    {
	      idx = 0;
	      for (i = 0; i < imx; i++)
		for (j = 0; j < imx; j++)
		  inv[i][j] = a[idx++];
	      for (i = 0; i < imx-1; i++)
		for (j = i+1; j < imx; j++)
		  inv[i][j] = inv[j][i];
	    }
	  else if (errc)
	    *errc = errno;
	}
      else if ((info < 0) && errc)
	*errc = EINVAL;
      else if ((info > 0) && errc)
	*errc = LERR_SINGULAR;
      vx_free(a);
    }
  return inv;
}

/*
  Calculate determinant of a real symmetric matrix 'fmx' with 'imx'
  rows using LAPACK functions...  'dsign' is sign of the determinant
  (+/-1.0), and 'dexp' is natural logarithm of the absolute value, so
  the final determinant is dsign*exp(dexp).

  Return 0 for success, -1 for failure, and set 'errc'. The errors are
  malloc() errors and EINVAL for bad arguments.
*/
int fmx_det(float **fmx, int imx, float *dsign, float *dexp, int *errc)
{
  int   i;
  int   j;
  int   idx;
  int   status;
  char  uplo;
  int   info;
  int   lwork;
  int   *ipiv;
  float *a;
  float *work;
  float **inv;

  *errc = 0;
  inv = (float **) 0;
  uplo = 'U';
  lwork = -1;
  work = malloc(sizeof(float));
  /*
    Get the optimal size for vector 'work'.
  */
  ssytrf_(&uplo, &imx, a, &imx, ipiv, work, &lwork, &info);
  lwork = work[0];
  free(work);
  /*
    Initialize workspace, based on the optimal size obtained above.
  */
  status = fmx_inv_init(imx, lwork, &a, &ipiv, &work);
  if (!status)
    {
      idx = 0;
      for (i = 0; i < imx; i++)
	for (j = 0; j < imx; j++)
	  a[idx++] = fmx[i][j];
      ssytrf_(&uplo, &imx, a, &imx, ipiv, work, &lwork, &info);
      if (dsign && dexp)
	comp_det(a, imx, ipiv, dsign, dexp);
      fmx_inv_free(work, ipiv);
    }
  status = 0;
  return status;
}

/*
  Internal matrix multiplication function.

  Multiply 'amatrix' with 'bmatrix', transpose the result and store it
  in 'fmx'. 'amatrix' is 'arows' by 'acolumns'. 'bmatrix' must be
  'acolumns' by 'bdim'. 

  If 'tflag' is 1, multiply 'amatrix' with transpose of 'bmatrix'. In
  this case 'bmatrix' must be 'bdim' by 'acolumns' (meaning that the
  transpose is 'acolumns' by 'bdim'). The result is again 'bdim' by
  'arows'.

  The product is 'arows' by 'bdim' if 'transpose' is 1, and 'bdim' by
  'arows' otherwise.

  The function assumes that the caller has allocated sufficient space
  for the matrix 'fmx'.

  On success, return 0. On failure, return -1. The errors are:
  'amatrix', 'bmatrix' or 'fmx' are NULL, or 'fmx' is found to be
  incompatible with the dimensions of the problem.
*/
static int fmx_mlta(float **fmx, float **amatrix, int arows, int acolumns, 
		    float **bmatrix, int bdim, int tflag, int transpose)
{
  int i;
  int j;
  int k;
  int status;
  double s;

  if (amatrix && bmatrix && fmx)
    {
      status = 0;
      if (transpose == 1)
	{
	  for (i = 0; (i < bdim) && !status; i++)
	    if (!fmx[i])
	      status = -1;
	}
      else
	{
	  for (i = 0; (i < arows) && !status; i++)
	    if (!fmx[i])
	      status = -1;
	}
      if (!status)
	for (i = 0; i < arows; i++)
	  for (j = 0; j < bdim; j++)
	    {
	      s = 0.0;
	      for (k = 0; k < acolumns; k++)
		if (tflag == 1)
		  s += amatrix[i][k]*bmatrix[j][k];
		else
		  s += amatrix[i][k]*bmatrix[k][j];
	      if (transpose == 1)
		fmx[j][i] = s;
	      else
		fmx[i][j] = s;
	    }
    }
  else
    status = -1;
  return status;
}

/*
  Multiply 'amatrix' with 'bmatrix'. 'amatrix' is 'arows' by
  'acolumns'. 'bmatrix' must be 'acolumns' by 'bdim'. The product is
  'arows' by 'bdim'.

  If 'tflag' is 1, multiply 'amatrix' with transpose of 'bmatrix'. In
  this case 'bmatrix' must be 'bdim' by 'acolumns' (meaning that the
  transpose is 'acolumns' by 'bdim'). The result is again 'arows' by
  'bdim'.

  The function allocates space for the product.

  In case of error, return (float **) 0 and set errno. The only
  possible errors are memory allocation errors.
*/
float **fmx_mult(float **amatrix, int arows, int acolumns, float **bmatrix, 
		 int bdim, int tflag)
{
  float  **fmx;

  fmx = (float **) 0;
  if (amatrix && bmatrix)
    {
      fmx = fmx_alloc(arows, bdim);
      if (fmx)
	fmx_multa(fmx, amatrix, arows, acolumns, bmatrix, bdim, tflag);
    }
  return fmx;
}

/*
  Multiply 'amatrix' with 'bmatrix' and store result in
  'fmx'. 'amatrix' is 'arows' by 'acolumns'. 'bmatrix' must be
  'acolumns' by 'bdim'. The product is 'arows' by 'bdim'.

  If 'tflag' is 1, multiply 'amatrix' with transpose of 'bmatrix'. In
  this case 'bmatrix' must be 'bdim' by 'acolumns' (meaning that the
  transpose is 'acolumns' by 'bdim'). The result is again 'arows' by
  'bdim'.

  The function assumes that the caller has allocated sufficient space
  in 'arows' by 'bdim' matrix 'fmx', for example by:

  fmx = fmx_alloc(arows, bdim);

  In case of success, return 0. If 'amatrix', 'bmatrix' or 'fmx' are
  NULL, or dimensions of 'fmx' are incompatible with dimensions of
  'amatrix', 'bmatrix', the function returns -1.
*/
int fmx_multa(float **fmx, float **amatrix, int arows, int acolumns, 
	      float **bmatrix, int bdim, int tflag)
{
  int status;

  status = fmx_mlta(fmx, amatrix, arows, acolumns, bmatrix, bdim, tflag, 0);
  return status;
}

/*
  Multiply 'amatrix' with 'bmatrix' and transpose the
  result. 

  'amatrix' is 'arows' by 'acolumns'. 'bmatrix' must be 'acolumns' by
  'bdim'. The product is 'bdim' by 'arows'.

  If 'tflag' is 1, multiply 'amatrix' with transpose of 'bmatrix'. In
  this case 'bmatrix' must be 'bdim' by 'acolumns' (meaning that the
  transpose is 'acolumns' by 'bdim'). The result is again 'bdim' by
  'arows'.

  The function allocates space for the product.

  In case of error, return (float **) 0 and set errno. The only
  possible errors are memory allocation errors.
*/
float **fmx_tmult(float **amatrix, int arows, int acolumns, float **bmatrix, 
		  int bdim, int tflag)
{
  float  **fmx;

  fmx = (float **) 0;
  if (amatrix && bmatrix)
    {
      fmx = fmx_alloc(bdim, arows);
      if (fmx)
	fmx_tmulta(fmx, amatrix, arows, acolumns, bmatrix, bdim, tflag);
    }
  return fmx;
}

/*
  Multiply 'amatrix' with 'bmatrix', transpose the result and store it
  in 'fmx'. 'amatrix' is 'arows' by 'acolumns'. 'bmatrix' must be
  'acolumns' by 'bdim'. The product is 'bdim' by 'arows'.

  If 'tflag' is 1, multiply 'amatrix' with transpose of 'bmatrix'. In
  this case 'bmatrix' must be 'bdim' by 'acolumns' (meaning that the
  transpose is 'acolumns' by 'bdim'). The result is again 'bdim' by
  'arows'.

  The function assumes that the caller has allocated sufficient space
  in 'bdim' by 'arows' matrix 'fmx', for example by:

  fmx = fmx_alloc(bdim, arows);

  In case of success, return 0. If 'amatrix', 'bmatrix' or 'fmx' are
  NULL, or dimensions of 'fmx' are incompatible with dimensions of
  'amatrix', 'bmatrix', the function returns -1.
*/
int fmx_tmulta(float **fmx, float **amatrix, int arows, int acolumns, 
	       float **bmatrix, int bdim, int tflag)
{
  int status;

  status = fmx_mlta(fmx, amatrix, arows, acolumns, bmatrix, bdim, tflag, 1);
  return status;
}

/*
  Return mean value of column 'col' in 'fmx'.
*/
float fmx_col_mean(float **fmx, int rows, int col)
{
  int    j;
  double s;
  float  xmean;

  xmean = 0.0;
  if (fmx && (rows > 0) && (col >= 0))
    {
      s = 0.0;
      for (j = 0; j < rows; j++)
	s += fmx[j][col];
      xmean = s/rows;
    }
  return xmean;
}

/*
  Return index of min. element in 'vec'. Return -1 in case of bad
  arguments.
*/
int fvec_argmin(float *fvec, int len)
{
  int   i;
  float fval;
  int   idx;
  
  idx = -1;
  if (fvec && (len > 0))
    {
      fval = fvec[0];
      idx = 0;
      for (i = 1; i < len; i++)
	if (fvec[i] < fval)
	  {
	    fval = fvec[i];
	    idx = i;
	  }
    }
  return idx;
}

/*
  Return index of max. element in 'vec'. Return -1 in case of bad
  arguments.
*/
int fvec_argmax(float *fvec, int len)
{
  int   i;
  float fval;
  int   idx;
  
  idx = -1;
  if (fvec && (len > 0))
    {
      fval = fvec[0];
      idx = 0;
      for (i = 1; i < len; i++)
	if (fvec[i] > fval)
	  {
	    fval = fvec[i];
	    idx = i;
	  }
    }
  return idx;
}

/*
  Return index, and optionally 'value', of max. element in 'vec'.
*/
int fvec_valmax(float *vec, int len, float *value)
{
  int   i;
  int   index = 0;
  float fmax;

  if (vec && (len > 0))
    {
      fmax = vec[0];
      for (i = 1; i < len; i++)
	if (vec[i] > fmax)
	  {
	    fmax = vec[i];
	    index = i;
	  }
      if (value)
	*value = fmax;
    }
  return index;
}

/*
  Find max. element in 'matrix'. The indexes are 0-based.
*/
void fmx_max(float **matrix, int rows, int columns, int *max_row, int *max_column,
	     float *value)
{
  int i;
  int idx;
  float fmax;

  if (matrix && (rows > 0) && (columns > 0) && value)
    {
      *value = matrix[0][0];
      *max_row = 0;
      *max_column = 0;
      for (i = 0; i < rows; i++)
	{
	  idx = fvec_valmax(matrix[i], columns, &fmax);
	  if (fmax > *value)
	    {
	      *value = fmax;
	      *max_row = i;
	      *max_column = idx;
	    }
	}
    }
}

/*
  Convert vector 'fvec' of length 'len' to 'rows' by 'columns' matrix.

  Return the matrix in case of success, otherwise return NULL and set
  errno. The errors are bad arguments and memory allocation errors.
*/
float **fmx_fvec(float *fvec, int len, int rows, int columns)
{
  int   i;
  int   status;
  int   offset;
  float **fmx;

  status = 0;
  fmx = (float **) 0;
  if (fvec && (rows > 0) && (columns > 0) && (len >= rows*columns))
    {
      fmx = calloc(rows, sizeof(float *));
      if (fmx)
	{
	  offset = 0;
	  for (i = 0; (i < rows) && !status; i++)
	    {
	      fmx[i] = fvec_clone(&fvec[offset], columns);
	      if (!fmx[i])
		{
		  status = -1;
		  fmx = (float **) mx_free((void **) fmx, rows);
		}
	      else
		offset += columns;
	    }
	}
    }
  else
    errno = EINVAL;
  return fmx;
}



syntax highlighted by Code2HTML, v. 0.9.1