/*---------------------------------------------------------------------------*/
/*
  This file contains routines for performing multiple linear regression.

  File:    RegAna.c
  Author:  B. Douglas Ward
  Date:    02 September 1998

  Mod:     Restructured matrix calculations to improve execution speed.
  Date:    16 December 1998

  Mod:     Added routines for matrix calculation of general linear tests.
  Date:    02 July 1999

  Mod:     Additional statistical output (partial R^2 statistics).
  Date:    07 September 1999

  Mod:     Modifications for compatibility with 3dDeconvolve options for
           writing the fitted full model time series (-fitts) and the 
           residual error time series (-errts) to 3d+time datasets.
  Date:    22 November 1999

  Mod:     Added function calc_sse_fit.
  Date:    21 April 2000

  Mod:     Additional output with -nodata option (norm.std.dev.'s for
           GLT linear constraints).
  Date:    11 August 2000

  Mod:     Modified use of vector_multiply() followed by vector_subtract()
           to use new function vector_multiply_subtract(), and to use
           new function vector_dotself() when possible -- RWCox.
  Date:    28 Dec 2001

  Mod:     Modification to calc_tcoef to calculate t-statistics for individual
           GLT linear constraints.
  Date:    29 January 2002

  Mod:     If FLOATIZE is defined, uses floats instead of doubles -- RWCox.
  Date:    03 Mar 2003

  Mod:     If use_psinv is 1, use matrix_psinv() instead of inversion.
  Date     19 Jul 2004 -- RWCox

*/

static int use_psinv = 1 ;  /* 19 Jul 2004 */

/*---------------------------------------------------------------------------*/

#ifndef FLOATIZE
# include "matrix.c"
#endif

void RA_error (char * message);  /* prototype */


/*---------------------------------------------------------------------------*/

/** macro to test a malloc-ed pointer for validity **/

#define MTEST(ptr) \
if((ptr)==NULL) \
( RA_error ("Cannot allocate memory") )
     

/*---------------------------------------------------------------------------*/
/*
  Calculate constant matrices to be used for all voxels.
*/

int calc_matrices 
(
  matrix xdata,                 /* experimental design matrix      */
  int p,                        /* number of parameters            */
  int * plist,                  /* list of parameters              */
  matrix * x,                   /* extracted X matrix              */
  matrix * xtxinv,              /* matrix:  1/(X'X)                */
  matrix * xtxinvxt             /* matrix:  (1/(X'X))X'            */
)

{
  matrix xt, xtx;               /* temporary matrix calculation results */
  int ok;                       /* flag for successful matrix inversion */

ENTRY("calc_matrices") ;

  /*----- extract the independent variable matrix X -----*/
  matrix_extract (xdata, p, plist, x);


  /*----- calculate various matrices which will be needed later -----*/
  if( p <= 1 || !use_psinv ){
    matrix_initialize (&xt);
    matrix_initialize (&xtx);
    matrix_transpose (*x, &xt);
    matrix_multiply (xt, *x, &xtx);
    ok = matrix_inverse_dsc (xtx, xtxinv);
    if (ok)
      matrix_multiply (*xtxinv, xt, xtxinvxt);
    else
      RA_error ("Regression setup: Improper X matrix (can't invert X'X) ");
    matrix_destroy (&xtx);
    matrix_destroy (&xt);
  } else {
    matrix_psinv  (*x, xtxinv , xtxinvxt ); ok = 1 ;  /* 19 Jul 2004 */
  }

  RETURN (ok);
}


/*---------------------------------------------------------------------------*/
/*
  Calculate constant matrix to be used for general linear test (GLT).
*/

int calc_glt_matrix 
(
  matrix xtxinv,              /* matrix:  1/(X'X)  */
  matrix c,                   /* matrix representing GLT linear constraints */
  matrix * a,                 /* constant matrix for use later */
  matrix * cxtxinvct          /* matrix: C(1/(X'X))C' for GLT */
)

{
  matrix ct, xtxinvct, t1, t2;  /* temporary matrix calculation results */
  int ok;                       /* flag for successful matrix inversion */


ENTRY("calc_glt_matrix") ;

  /*----- initialize matrices -----*/
  matrix_initialize (&ct);
  matrix_initialize (&xtxinvct);
  matrix_initialize (&t1);
  matrix_initialize (&t2);


  /*----- calculate the constant matrix which will be needed later -----*/
  matrix_transpose (c, &ct);                 /* [c]' */
  matrix_multiply (xtxinv, ct, &xtxinvct);   /* inv{[x]'[x]} [c]' */
  matrix_multiply (c, xtxinvct, cxtxinvct);  /* [c] inv{[x]'[x]} [c]' */
  ok = matrix_inverse_dsc (*cxtxinvct, &t2); /* inv{ [c] inv{[x]'[x]} [c]' } */
  if (ok)
    {
      matrix_multiply (xtxinvct, t2, &t1);
      matrix_multiply (t1, c, &t2);
      matrix_identity (xtxinv.rows, &t1);
      matrix_subtract (t1, t2, a);
    }
  else
    RA_error ("GLT setup: Improper C matrix (can't invert C[1/(X'X)]C') ");


  /*----- dispose of matrices -----*/
  matrix_destroy (&ct);
  matrix_destroy (&xtxinvct);
  matrix_destroy (&t1);
  matrix_destroy (&t2);

  RETURN (ok);
}


/*---------------------------------------------------------------------------*/
/*
  Calculate the error sum of squares.
*/

float  calc_sse 
(
  matrix x,                  /* independent variable matrix  */
  vector b,                  /* vector of estimated regression parameters */
  vector y                   /* vector of measured data */
)

{
#if 0
  vector yhat;               /* product Xb */
#endif
  vector e;                  /* vector of residuals */
  float sse;                 /* error sum of squares */


  /*----- initialize vectors -----*/
#if 0
  vector_initialize (&yhat);
#endif
  vector_initialize (&e);


  /*----- calculate the error sum of squares -----*/
#if 0
  vector_multiply (x, b, &yhat);
  vector_subtract (y, yhat, &e);
  sse = vector_dot (e, e);
#else
  sse = vector_multiply_subtract( x , b , y , &e ) ;
#endif


  /*----- dispose of vectors -----*/
  vector_destroy (&e);
#if 0
  vector_destroy (&yhat);
#endif


  /*----- return SSE -----*/
  return (sse);
 
}


/*---------------------------------------------------------------------------*/
/*
  Calculate the error sum of squares and the residuals.
*/

float  calc_resids
(
  matrix x,                  /* independent variable matrix  */
  vector b,                  /* vector of estimated regression parameters */
  vector y,                  /* vector of measured data */
  vector * e                 /* vector of residuals */
)

{
#if 0
  vector yhat;               /* product Xb */
#endif
  float sse;                 /* error sum of squares */


  /*----- initialize vectors -----*/
#if 0
  vector_initialize (&yhat);
#endif


  /*----- calculate the error sum of squares -----*/
#if 0
  vector_multiply (x, b, &yhat);
  vector_subtract (y, yhat, e);
  sse = vector_dot (*e, *e);
#else
  sse = vector_multiply_subtract( x , b , y , e ) ;
#endif


  /*----- dispose of vectors -----*/
#if 0
  vector_destroy (&yhat);
#endif


  /*----- return SSE -----*/
  return (sse);
 
}


/*---------------------------------------------------------------------------*/
/*
  Calculate the error sum of squares.  Also, return the fitted time series, 
  and residual errors time series.
*/

float  calc_sse_fit
(
  matrix x,                  /* independent variable matrix  */
  vector b,                  /* vector of estimated regression parameters */
  vector y,                  /* vector of measured data */
  float * fitts,             /* full model fitted time series */
  float * errts              /* full model residual error time series */
)

{
  vector yhat;               /* product Xb */
  vector e;                  /* vector of residuals */
  float sse;                 /* error sum of squares */
  int it;                    /* time point index */


  /*----- initialize vectors -----*/
  vector_initialize (&yhat);
  vector_initialize (&e);


  /*----- calculate the error sum of squares -----*/
  vector_multiply (x, b, &yhat);
  vector_subtract (y, yhat, &e);
  sse = vector_dotself( e );

  /*----- save the fitted time series and residual errors -----*/
  for (it = 0;  it < x.rows;  it++)
    {
      fitts[it] = yhat.elts[it];
      errts[it] = e.elts[it];
    }


  /*----- dispose of vectors -----*/
  vector_destroy (&e);
  vector_destroy (&yhat);


  /*----- return SSE -----*/
  return (sse);
 
}


/*---------------------------------------------------------------------------*/
/*
  Calculate the pure error sum of squares.
*/

float  calc_sspe 
(
  vector y,                  /* vector of measured data */
  int * levels,              /* indices for repeat observations */
  int * counts,              /* number of observations at each level */
  int c                      /* number of unique rows in ind. var. matrix */
)

{
  register int i, j;                  /* indices */
  register float * sum = NULL;        /* sum of observations at each level */
  register float diff;                /* difference between observation and average */
  register float sspe;                /* pure error sum of squares */


  /*----- initialize sum -----*/
  sum = (float *) malloc (sizeof(float) * c);
  MTEST (sum);
  
  for (j = 0;  j < c;  j++)
    sum[j] = 0.0;


  /*----- accumulate sum for each level -----*/
  for (i = 0;  i < y.dim;  i++)
    {
      j = levels[i];
      sum[j] += y.elts[i];
    }


  /*----- calculate SSPE -----*/
  sspe = 0.0;
  for (i = 0;  i < y.dim;  i++)
    {
      j = levels[i];
      diff = y.elts[i] - (sum[j]/counts[j]);
      sspe += diff * diff;
    }

  
  free (sum);   sum = NULL;


  /*----- return SSPE -----*/
  return (sspe);
 
}


/*---------------------------------------------------------------------------*/
/*
  Calculate the F-statistic for lack of fit.
*/

float calc_flof
(
  int n,                      /* number of data points */
  int p,                      /* number of parameters in the full model */
  int c,                      /* number of unique rows in ind. var. matrix */
  float sse,                  /* error sum of squares from full model */
  float sspe                  /* error sum of squares due to pure error */
)

{
  const float EPSILON = 1.0e-5;      /* protection against divide by zero */
  float mspe;                 /* mean square error due to pure error */
  float sslf;                 /* error sum of squares due to lack of fit */
  float mslf;                 /* mean square error due to lack of fit */
  float flof;                 /* F-statistic for lack of fit */


  /*----- calculate mean sum of squares due to pure error -----*/
  mspe = sspe / (n - c);


  /*----- calculate mean sum of squares due to lack of fit -----*/
  sslf = sse - sspe;
  mslf = sslf / (c - p);


  /*----- calculate F-statistic for lack of fit -----*/
  if (mspe < EPSILON)
    flof = 0.0;
  else
    flof = mslf / mspe;


  /*----- return F-statistic for lack of fit -----*/
  return (flof);

}


/*---------------------------------------------------------------------------*/
/*
  Calculate the regression coefficients.
*/

void calc_coef 
(
  matrix xtxinvxt,            /* matrix:  (1/(X'X))X'   */
  vector y,                   /* vector of measured data   */
  vector * coef               /* vector of regression parameters */
)

{

  /*----- calculate regression coefficients -----*/
  vector_multiply (xtxinvxt, y, coef);

}


/*---------------------------------------------------------------------------*/
/*
  Calculate least squares estimators under the reduced model.
*/

void calc_rcoef 
(
  matrix a,            /* constant matrix for least squares calculation  */
  vector coef,         /* vector of regression parameters */
  vector * rcoef       /* reduced model regression coefficients */
)

{

  /*----- calculate reduced model regression coefficients -----*/
  vector_multiply (a, coef, rcoef);

}


/*---------------------------------------------------------------------------*/
/*
  Calculate linear combinations of regression coefficients.
*/

void calc_lcoef 
(
  matrix c,            /* matrix representing GLT linear constraints  */
  vector coef,         /* vector of regression parameters */
  vector * lcoef       /* linear combinations of regression parameters */
)

{

  /*----- calculate linear combinations -----*/
  vector_multiply (c, coef, lcoef);

}


/*---------------------------------------------------------------------------*/
/*
  Calculate standard deviations and t-statistics for the regression 
  coefficients.
*/

void calc_tcoef 
(
  int n,                      /* number of data points */
  int p,                      /* number of parameters in the full model */
  float sse,                  /* error sum of squares */
  matrix xtxinv,              /* matrix:  1/(Xf'Xf)        */
  vector coef,                /* vector of regression parameters */
  vector * scoef,             /* std. devs. for regression parameters */
  vector * tcoef              /* t-statistics for regression parameters */
)

{
  const float MAXT = 1000.0;         /* maximum value for t-statistic */
  const float EPSILON = 1.0e-5;      /* protection against divide by zero */
  int df;                     /* error degrees of freedom */
  float mse;                  /* mean square error */
  register int i;             /* parameter index */
  float stddev;               /* standard deviation for parameter estimate */
  float tstat;                /* t-statistic for parameter estimate */
  float num;                  /* numerator of t-statistic */
  float var;                  /* variance for parameter estimate */


  /*----- Create vectors -----*/
  vector_create (p, scoef);
  vector_create (p, tcoef);


  /*----- Calculate mean square error -----*/
  df = n - p;
  mse = sse / df;


  for (i = 0;  i < xtxinv.rows;  i++)
    {
      /*----- Calculate standard deviation for regression parameters -----*/
      var = mse * xtxinv.elts[i][i];
      if (var <= 0.0) stddev = 0.0;
      else            stddev = sqrt (var);
      scoef->elts[i] = stddev;


      /*----- Calculate t-statistic for regression parameters -----*/
      num = coef.elts[i];
           if (num >  MAXT*stddev) tstat = MAXT;
      else if (num < -MAXT*stddev) tstat = -MAXT;
	   else if (stddev < EPSILON)   tstat = 0.0;
	   else                         tstat = num / stddev;


      /*----- Limit range of values for t-statistic -----*/
      if (tstat < -MAXT)  tstat = -MAXT;
      if (tstat >  MAXT)  tstat = MAXT;
      
      tcoef->elts[i] = tstat;
    }
}


/*---------------------------------------------------------------------------*/
/*
  Calculate the F-statistic for significance of the regression.
*/

float calc_freg
(
  int n,                      /* number of data points */
  int p,                      /* number of parameters in the full model */
  int q,                      /* number of parameters in the rdcd model */
  float ssef,                 /* error sum of squares from full model */
  float sser                  /* error sum of squares from reduced model */
)

{
  const float MAXF = 1000.0;         /* maximum value for F-statistic */
  const float EPSILON = 1.0e-5;      /* protection against divide by zero */
  float msef;                 /* mean square error for the full model */
  float msreg;                /* mean square due to the regression */
  float freg;                 /* F-statistic for the full regression model */


  /*----- Order of reduced model = order of full model ??? -----*/
  if (p <= q)   return (0.0);


  /*----- Calculate numerator and denominator of F-statistic -----*/
  msreg = (sser - ssef) / (p - q);    if (msreg < 0.0)  msreg = 0.0;
  msef   = ssef / (n - p);            if (msef  < 0.0)  msef  = 0.0;

  if (msreg > MAXF*msef)  freg = MAXF;
  else 
    if (msef < EPSILON)
      freg = 0.0;
    else
      freg = msreg / msef;


  /*----- Limit range of values for F-statistic -----*/
       if (freg < 0.0)   freg = 0.0;
  else if (freg > MAXF)  freg = MAXF;


  /*----- Return F-statistic for significance of the regression -----*/
  return (freg);

}


/*---------------------------------------------------------------------------*/
/*
  Calculate the coefficient of multiple determination R^2.
*/

float calc_rsqr 
(
  float ssef,                 /* error sum of squares from full model */
  float sser                  /* error sum of squares from reduced model */
)

{
  const float EPSILON = 1.0e-5;     /* protection against divide by zero */
  float rsqr;                       /* coeff. of multiple determination R^2  */


  /*----- coefficient of multiple determination R^2 -----*/
  if (sser < EPSILON)
    rsqr = 0.0;
  else
    rsqr = (sser - ssef) / sser;


  /*----- Limit range of values for R^2 -----*/
       if (rsqr < 0.0)   rsqr = 0.0;
  else if (rsqr > 1.0)   rsqr = 1.0;


  /*----- Return coefficient of multiple determination R^2 -----*/
  return (rsqr);
}


/*---------------------------------------------------------------------------*/