/*---------------------------------------------------------------------------*/ /* This file contains routines for performing multiple linear regression. File: RegAna.c Author: B. Douglas Ward Date: 02 September 1998 Mod: Restructured matrix calculations to improve execution speed. Date: 16 December 1998 Mod: Added routines for matrix calculation of general linear tests. Date: 02 July 1999 Mod: Additional statistical output (partial R^2 statistics). Date: 07 September 1999 Mod: Modifications for compatibility with 3dDeconvolve options for writing the fitted full model time series (-fitts) and the residual error time series (-errts) to 3d+time datasets. Date: 22 November 1999 Mod: Added function calc_sse_fit. Date: 21 April 2000 Mod: Additional output with -nodata option (norm.std.dev.'s for GLT linear constraints). Date: 11 August 2000 Mod: Modified use of vector_multiply() followed by vector_subtract() to use new function vector_multiply_subtract(), and to use new function vector_dotself() when possible -- RWCox. Date: 28 Dec 2001 Mod: Modification to calc_tcoef to calculate t-statistics for individual GLT linear constraints. Date: 29 January 2002 Mod: If FLOATIZE is defined, uses floats instead of doubles -- RWCox. Date: 03 Mar 2003 Mod: If use_psinv is 1, use matrix_psinv() instead of inversion. Date 19 Jul 2004 -- RWCox */ static int use_psinv = 1 ; /* 19 Jul 2004 */ /*---------------------------------------------------------------------------*/ #ifndef FLOATIZE # include "matrix.c" #endif void RA_error (char * message); /* prototype */ /*---------------------------------------------------------------------------*/ /** macro to test a malloc-ed pointer for validity **/ #define MTEST(ptr) \ if((ptr)==NULL) \ ( RA_error ("Cannot allocate memory") ) /*---------------------------------------------------------------------------*/ /* Calculate constant matrices to be used for all voxels. */ int calc_matrices ( matrix xdata, /* experimental design matrix */ int p, /* number of parameters */ int * plist, /* list of parameters */ matrix * x, /* extracted X matrix */ matrix * xtxinv, /* matrix: 1/(X'X) */ matrix * xtxinvxt /* matrix: (1/(X'X))X' */ ) { matrix xt, xtx; /* temporary matrix calculation results */ int ok; /* flag for successful matrix inversion */ ENTRY("calc_matrices") ; /*----- extract the independent variable matrix X -----*/ matrix_extract (xdata, p, plist, x); /*----- calculate various matrices which will be needed later -----*/ if( p <= 1 || !use_psinv ){ matrix_initialize (&xt); matrix_initialize (&xtx); matrix_transpose (*x, &xt); matrix_multiply (xt, *x, &xtx); ok = matrix_inverse_dsc (xtx, xtxinv); if (ok) matrix_multiply (*xtxinv, xt, xtxinvxt); else RA_error ("Regression setup: Improper X matrix (can't invert X'X) "); matrix_destroy (&xtx); matrix_destroy (&xt); } else { matrix_psinv (*x, xtxinv , xtxinvxt ); ok = 1 ; /* 19 Jul 2004 */ } RETURN (ok); } /*---------------------------------------------------------------------------*/ /* Calculate constant matrix to be used for general linear test (GLT). */ int calc_glt_matrix ( matrix xtxinv, /* matrix: 1/(X'X) */ matrix c, /* matrix representing GLT linear constraints */ matrix * a, /* constant matrix for use later */ matrix * cxtxinvct /* matrix: C(1/(X'X))C' for GLT */ ) { matrix ct, xtxinvct, t1, t2; /* temporary matrix calculation results */ int ok; /* flag for successful matrix inversion */ ENTRY("calc_glt_matrix") ; /*----- initialize matrices -----*/ matrix_initialize (&ct); matrix_initialize (&xtxinvct); matrix_initialize (&t1); matrix_initialize (&t2); /*----- calculate the constant matrix which will be needed later -----*/ matrix_transpose (c, &ct); /* [c]' */ matrix_multiply (xtxinv, ct, &xtxinvct); /* inv{[x]'[x]} [c]' */ matrix_multiply (c, xtxinvct, cxtxinvct); /* [c] inv{[x]'[x]} [c]' */ ok = matrix_inverse_dsc (*cxtxinvct, &t2); /* inv{ [c] inv{[x]'[x]} [c]' } */ if (ok) { matrix_multiply (xtxinvct, t2, &t1); matrix_multiply (t1, c, &t2); matrix_identity (xtxinv.rows, &t1); matrix_subtract (t1, t2, a); } else RA_error ("GLT setup: Improper C matrix (can't invert C[1/(X'X)]C') "); /*----- dispose of matrices -----*/ matrix_destroy (&ct); matrix_destroy (&xtxinvct); matrix_destroy (&t1); matrix_destroy (&t2); RETURN (ok); } /*---------------------------------------------------------------------------*/ /* Calculate the error sum of squares. */ float calc_sse ( matrix x, /* independent variable matrix */ vector b, /* vector of estimated regression parameters */ vector y /* vector of measured data */ ) { #if 0 vector yhat; /* product Xb */ #endif vector e; /* vector of residuals */ float sse; /* error sum of squares */ /*----- initialize vectors -----*/ #if 0 vector_initialize (&yhat); #endif vector_initialize (&e); /*----- calculate the error sum of squares -----*/ #if 0 vector_multiply (x, b, &yhat); vector_subtract (y, yhat, &e); sse = vector_dot (e, e); #else sse = vector_multiply_subtract( x , b , y , &e ) ; #endif /*----- dispose of vectors -----*/ vector_destroy (&e); #if 0 vector_destroy (&yhat); #endif /*----- return SSE -----*/ return (sse); } /*---------------------------------------------------------------------------*/ /* Calculate the error sum of squares and the residuals. */ float calc_resids ( matrix x, /* independent variable matrix */ vector b, /* vector of estimated regression parameters */ vector y, /* vector of measured data */ vector * e /* vector of residuals */ ) { #if 0 vector yhat; /* product Xb */ #endif float sse; /* error sum of squares */ /*----- initialize vectors -----*/ #if 0 vector_initialize (&yhat); #endif /*----- calculate the error sum of squares -----*/ #if 0 vector_multiply (x, b, &yhat); vector_subtract (y, yhat, e); sse = vector_dot (*e, *e); #else sse = vector_multiply_subtract( x , b , y , e ) ; #endif /*----- dispose of vectors -----*/ #if 0 vector_destroy (&yhat); #endif /*----- return SSE -----*/ return (sse); } /*---------------------------------------------------------------------------*/ /* Calculate the error sum of squares. Also, return the fitted time series, and residual errors time series. */ float calc_sse_fit ( matrix x, /* independent variable matrix */ vector b, /* vector of estimated regression parameters */ vector y, /* vector of measured data */ float * fitts, /* full model fitted time series */ float * errts /* full model residual error time series */ ) { vector yhat; /* product Xb */ vector e; /* vector of residuals */ float sse; /* error sum of squares */ int it; /* time point index */ /*----- initialize vectors -----*/ vector_initialize (&yhat); vector_initialize (&e); /*----- calculate the error sum of squares -----*/ vector_multiply (x, b, &yhat); vector_subtract (y, yhat, &e); sse = vector_dotself( e ); /*----- save the fitted time series and residual errors -----*/ for (it = 0; it < x.rows; it++) { fitts[it] = yhat.elts[it]; errts[it] = e.elts[it]; } /*----- dispose of vectors -----*/ vector_destroy (&e); vector_destroy (&yhat); /*----- return SSE -----*/ return (sse); } /*---------------------------------------------------------------------------*/ /* Calculate the pure error sum of squares. */ float calc_sspe ( vector y, /* vector of measured data */ int * levels, /* indices for repeat observations */ int * counts, /* number of observations at each level */ int c /* number of unique rows in ind. var. matrix */ ) { register int i, j; /* indices */ register float * sum = NULL; /* sum of observations at each level */ register float diff; /* difference between observation and average */ register float sspe; /* pure error sum of squares */ /*----- initialize sum -----*/ sum = (float *) malloc (sizeof(float) * c); MTEST (sum); for (j = 0; j < c; j++) sum[j] = 0.0; /*----- accumulate sum for each level -----*/ for (i = 0; i < y.dim; i++) { j = levels[i]; sum[j] += y.elts[i]; } /*----- calculate SSPE -----*/ sspe = 0.0; for (i = 0; i < y.dim; i++) { j = levels[i]; diff = y.elts[i] - (sum[j]/counts[j]); sspe += diff * diff; } free (sum); sum = NULL; /*----- return SSPE -----*/ return (sspe); } /*---------------------------------------------------------------------------*/ /* Calculate the F-statistic for lack of fit. */ float calc_flof ( int n, /* number of data points */ int p, /* number of parameters in the full model */ int c, /* number of unique rows in ind. var. matrix */ float sse, /* error sum of squares from full model */ float sspe /* error sum of squares due to pure error */ ) { const float EPSILON = 1.0e-5; /* protection against divide by zero */ float mspe; /* mean square error due to pure error */ float sslf; /* error sum of squares due to lack of fit */ float mslf; /* mean square error due to lack of fit */ float flof; /* F-statistic for lack of fit */ /*----- calculate mean sum of squares due to pure error -----*/ mspe = sspe / (n - c); /*----- calculate mean sum of squares due to lack of fit -----*/ sslf = sse - sspe; mslf = sslf / (c - p); /*----- calculate F-statistic for lack of fit -----*/ if (mspe < EPSILON) flof = 0.0; else flof = mslf / mspe; /*----- return F-statistic for lack of fit -----*/ return (flof); } /*---------------------------------------------------------------------------*/ /* Calculate the regression coefficients. */ void calc_coef ( matrix xtxinvxt, /* matrix: (1/(X'X))X' */ vector y, /* vector of measured data */ vector * coef /* vector of regression parameters */ ) { /*----- calculate regression coefficients -----*/ vector_multiply (xtxinvxt, y, coef); } /*---------------------------------------------------------------------------*/ /* Calculate least squares estimators under the reduced model. */ void calc_rcoef ( matrix a, /* constant matrix for least squares calculation */ vector coef, /* vector of regression parameters */ vector * rcoef /* reduced model regression coefficients */ ) { /*----- calculate reduced model regression coefficients -----*/ vector_multiply (a, coef, rcoef); } /*---------------------------------------------------------------------------*/ /* Calculate linear combinations of regression coefficients. */ void calc_lcoef ( matrix c, /* matrix representing GLT linear constraints */ vector coef, /* vector of regression parameters */ vector * lcoef /* linear combinations of regression parameters */ ) { /*----- calculate linear combinations -----*/ vector_multiply (c, coef, lcoef); } /*---------------------------------------------------------------------------*/ /* Calculate standard deviations and t-statistics for the regression coefficients. */ void calc_tcoef ( int n, /* number of data points */ int p, /* number of parameters in the full model */ float sse, /* error sum of squares */ matrix xtxinv, /* matrix: 1/(Xf'Xf) */ vector coef, /* vector of regression parameters */ vector * scoef, /* std. devs. for regression parameters */ vector * tcoef /* t-statistics for regression parameters */ ) { const float MAXT = 1000.0; /* maximum value for t-statistic */ const float EPSILON = 1.0e-5; /* protection against divide by zero */ int df; /* error degrees of freedom */ float mse; /* mean square error */ register int i; /* parameter index */ float stddev; /* standard deviation for parameter estimate */ float tstat; /* t-statistic for parameter estimate */ float num; /* numerator of t-statistic */ float var; /* variance for parameter estimate */ /*----- Create vectors -----*/ vector_create (p, scoef); vector_create (p, tcoef); /*----- Calculate mean square error -----*/ df = n - p; mse = sse / df; for (i = 0; i < xtxinv.rows; i++) { /*----- Calculate standard deviation for regression parameters -----*/ var = mse * xtxinv.elts[i][i]; if (var <= 0.0) stddev = 0.0; else stddev = sqrt (var); scoef->elts[i] = stddev; /*----- Calculate t-statistic for regression parameters -----*/ num = coef.elts[i]; if (num > MAXT*stddev) tstat = MAXT; else if (num < -MAXT*stddev) tstat = -MAXT; else if (stddev < EPSILON) tstat = 0.0; else tstat = num / stddev; /*----- Limit range of values for t-statistic -----*/ if (tstat < -MAXT) tstat = -MAXT; if (tstat > MAXT) tstat = MAXT; tcoef->elts[i] = tstat; } } /*---------------------------------------------------------------------------*/ /* Calculate the F-statistic for significance of the regression. */ float calc_freg ( int n, /* number of data points */ int p, /* number of parameters in the full model */ int q, /* number of parameters in the rdcd model */ float ssef, /* error sum of squares from full model */ float sser /* error sum of squares from reduced model */ ) { const float MAXF = 1000.0; /* maximum value for F-statistic */ const float EPSILON = 1.0e-5; /* protection against divide by zero */ float msef; /* mean square error for the full model */ float msreg; /* mean square due to the regression */ float freg; /* F-statistic for the full regression model */ /*----- Order of reduced model = order of full model ??? -----*/ if (p <= q) return (0.0); /*----- Calculate numerator and denominator of F-statistic -----*/ msreg = (sser - ssef) / (p - q); if (msreg < 0.0) msreg = 0.0; msef = ssef / (n - p); if (msef < 0.0) msef = 0.0; if (msreg > MAXF*msef) freg = MAXF; else if (msef < EPSILON) freg = 0.0; else freg = msreg / msef; /*----- Limit range of values for F-statistic -----*/ if (freg < 0.0) freg = 0.0; else if (freg > MAXF) freg = MAXF; /*----- Return F-statistic for significance of the regression -----*/ return (freg); } /*---------------------------------------------------------------------------*/ /* Calculate the coefficient of multiple determination R^2. */ float calc_rsqr ( float ssef, /* error sum of squares from full model */ float sser /* error sum of squares from reduced model */ ) { const float EPSILON = 1.0e-5; /* protection against divide by zero */ float rsqr; /* coeff. of multiple determination R^2 */ /*----- coefficient of multiple determination R^2 -----*/ if (sser < EPSILON) rsqr = 0.0; else rsqr = (sser - ssef) / sser; /*----- Limit range of values for R^2 -----*/ if (rsqr < 0.0) rsqr = 0.0; else if (rsqr > 1.0) rsqr = 1.0; /*----- Return coefficient of multiple determination R^2 -----*/ return (rsqr); } /*---------------------------------------------------------------------------*/