/*
 *  R : A Computer Language for Statistical Data Analysis
 *  Copyright (C) 1995-2006	Robert Gentleman, Ross Ihaka and the
 *				R Development Core Team
 *  Copyright (C) 2003		The R Foundation
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  A copy of the GNU General Public License is available via WWW at
 *  http://www.gnu.org/copyleft/gpl.html.  You can also obtain it by
 *  writing to the Free Software Foundation, Inc., 51 Franklin Street
 *  Fifth Floor, Boston, MA 02110-1301  USA.
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <Defn.h>
#include <Rmath.h>


#define COV_SUM_UPDATE				\
		    sum += xm * ym;		\
		    if(cor) {			\
			xsd += xm * xm;		\
			ysd += ym * ym;		\
		    }

/* Note that "if (kendall)" and	 "if (cor)" are used inside a double for() loop;
   which makes the code better readable -- and is hopefully dealt with
   by a smartly optimizing compiler
*/

#define COV_PAIRWISE_BODY						\
	    xx = &x[i * n];						\
	    nobs = 0;							\
	    if(!kendall) {						\
	    	xmean = ymean = 0.;					\
		for (k = 0 ; k < n ; k++) {				\
		    if(!(ISNAN(xx[k]) || ISNAN(yy[k]))) {		\
			nobs ++;					\
			xmean += xx[k];					\
			ymean += yy[k];					\
		    }							\
		}							\
	    } else /*kendall*/						\
		for (k = 0 ; k < n ; k++)				\
		    if(!(ISNAN(xx[k]) || ISNAN(yy[k])))			\
			nobs ++;					\
									\
	    if (nobs >= 2) {						\
		xsd = ysd = sum = 0.;					\
		if(!kendall) {						\
		    xmean /= nobs;					\
		    ymean /= nobs;					\
		    n1 = nobs-1;					\
		}							\
		for(k=0; k < n; k++) {					\
		    if(!(ISNAN(xx[k]) || ISNAN(yy[k]))) {		\
			if(!kendall) {					\
			    xm = xx[k] - xmean;				\
			    ym = yy[k] - ymean;				\
									\
			    COV_SUM_UPDATE				\
			}						\
			else { /* Kendall's tau */			\
			    for(n1=0 ; n1 < k ; n1++)			\
				if(!(ISNAN(xx[n1]) || ISNAN(yy[n1]))) {	\
				    xm = sign(xx[k] - xx[n1]);		\
				    ym = sign(yy[k] - yy[n1]);		\
									\
				    COV_SUM_UPDATE			\
				}					\
			}						\
		    }							\
		}							\
		if (cor) {						\
		    if(xsd == 0. || ysd == 0.) {			\
			*sd_0 = TRUE;					\
			sum = NA_REAL;					\
		    }							\
		    else {						\
			if(!kendall) {					\
			    xsd /= n1;					\
			    ysd /= n1;					\
			    sum /= n1;					\
			}						\
			sum /= (sqrt(xsd) * sqrt(ysd));			\
			if(sum > 1.) sum = 1.;				\
		    }							\
		}							\
		else if(!kendall)					\
		    sum /= n1;						\
									\
		ans[i + j * ncx] = sum;					\
	    }								\
	    else							\
		ans[i + j * ncx] = NA_REAL;


static void cov_pairwise1(int n, int ncx, double *x,
			  double *ans, Rboolean *sd_0, Rboolean cor, 
			  Rboolean kendall)
{
    LDOUBLE sum, xmean =0., ymean =0., xsd, ysd, xm, ym;
    double *xx, *yy;
    int i, j, k, nobs, n1 = -1;	/* -Wall initializing */

    for (i = 0 ; i < ncx ; i++) {
	for (j = 0 ; j <= i ; j++) {
	    yy = &x[j * n];

	    COV_PAIRWISE_BODY

	    ans[j + i * ncx] = ans[i + j * ncx];
	}
    }
}

static void cov_pairwise2(int n, int ncx, int ncy, double *x, double *y,
			  double *ans, Rboolean *sd_0, Rboolean cor, 
			  Rboolean kendall)
{
    LDOUBLE sum, xmean =0., ymean =0., xsd, ysd, xm, ym;
    double *xx, *yy;
    int i, j, k, nobs, n1 = -1;	/* -Wall initializing */

    for (i = 0 ; i < ncx ; i++) {
	for (j = 0 ; j < ncy ; j++) {
	    yy = &y[j * n];

	    COV_PAIRWISE_BODY
	}
    }
}
#undef COV_PAIRWISE_BODY


/* ----- method = "complete" : ----- */

#define ANS(I,J)  ans[I + J * ncx]

#define COV_init(_ny_)				\
    LDOUBLE sum, tmp, xxm, yym;			\
    double *xx, *yy;				\
    int i, j, k, nobs, n1=-1;/* -Wall */	\
						\
    /* total number of complete observations */	\
    nobs = 0;					\
    for(k = 0 ; k < n ; k++) {			\
	if (ind[k] != 0) nobs++;		\
    }						\
    if (nobs <= 1) {/* too many missing */	\
	for (i = 0 ; i < ncx ; i++)		\
	    for (j = 0 ; j < _ny_ ; j++)	\
		ANS(i,j) = NA_REAL;		\
	return;					\
    }


/* This uses two passes for better accuracy */
#define MEAN(_X_)				\
    /* variable means */			\
    for (i = 0 ; i < nc##_X_ ; i++) {		\
	xx = &_X_[i * n];			\
	sum = 0.;				\
	for (k = 0 ; k < n ; k++)		\
	    if(ind[k] != 0)			\
		sum += xx[k];			\
        tmp = sum / nobs;			\
        if(R_FINITE((double)tmp)) {		\
 	    sum = 0.;				\
	    for (k = 0 ; k < n ; k++)		\
	        if(ind[k] != 0)			\
		    sum += (xx[k] - tmp);      	\
             tmp = tmp + sum / nobs;		\
	}					\
	_X_##m [i] = tmp;			\
    }


static void
cov_complete1(int n, int ncx, double *x, double *xm,
	      int *ind, double *ans, Rboolean *sd_0, Rboolean cor,
	      Rboolean kendall)
{
    COV_init(ncx);

    if(!kendall) {
	MEAN(x);/* -> xm[] */
	n1 = nobs - 1;
    }
    for (i = 0 ; i < ncx ; i++) {
	xx = &x[i * n];

	if(!kendall) {
	    xxm = xm[i];
	    for (j = 0 ; j <= i ; j++) {
		yy = &x[j * n];
		yym = xm[j];
		sum = 0.;
		for (k = 0 ; k < n ; k++)
		    if (ind[k] != 0)
			sum += (xx[k] - xxm) * (yy[k] - yym);
		ANS(j,i) = ANS(i,j) = sum / n1;
	    }
	}
	else { /* Kendall's tau */
	    for (j = 0 ; j <= i ; j++) {
		yy = &x[j * n];
		sum = 0.;
		for (k = 0 ; k < n ; k++)
		    if (ind[k] != 0)
			for (n1 = 0 ; n1 < n ; n1++)
			    if (ind[n1] != 0)
				sum += sign(xx[k] - xx[n1])
				    * sign(yy[k] - yy[n1]);
		ANS(j,i) = ANS(i,j) = sum;
	    }
	}
    }

    if (cor) {
	for (i = 0 ; i < ncx ; i++)
	    xm[i] = sqrt(ANS(i,i));
	for (i = 0 ; i < ncx ; i++) {
	    for (j = 0 ; j < i ; j++) {
		if (xm[i] == 0 || xm[j] == 0) {
		    *sd_0 = TRUE;
		    ANS(j,i) = ANS(i,j) = NA_REAL;
		}
		else {
		    sum = ANS(i,j) / (xm[i] * xm[j]);
		    if(sum > 1.) sum = 1.;
		    ANS(j,i) = ANS(i,j) = sum;
		}
	    }
	    ANS(i,i) = 1.0;
	}
    }
}

static void
cov_complete2(int n, int ncx, int ncy, double *x, double *y,
	      double *xm, double *ym, int *ind,
	      double *ans, Rboolean *sd_0, Rboolean cor, Rboolean kendall)
{
    COV_init(ncy);

    if(!kendall) {
	MEAN(x);/* -> xm[] */
	MEAN(y);/* -> ym[] */
	n1 = nobs - 1;
    }
    for (i = 0 ; i < ncx ; i++) {
	xx = &x[i * n];
	if(!kendall) {
	    xxm = xm[i];
	    for (j = 0 ; j < ncy ; j++) {
		yy = &y[j * n];
		yym = ym[j];
		sum = 0.;
		for (k = 0 ; k < n ; k++)
		    if (ind[k] != 0)
			sum += (xx[k] - xxm) * (yy[k] - yym);
		ANS(i,j) = sum / n1;
	    }
	}
	else { /* Kendall's tau */
	    for (j = 0 ; j < ncy ; j++) {
		yy = &y[j * n];
		sum = 0.;
		for (k = 0 ; k < n ; k++)
		    if (ind[k] != 0)
			for (n1 = 0 ; n1 < n ; n1++)
			    if (ind[n1] != 0)
				sum += sign(xx[k] - xx[n1])
				    * sign(yy[k] - yy[n1]);
		ANS(i,j) = sum;
	    }
	}
    }

    if (cor) {

#define COV_SDEV(_X_)							\
	for (i = 0 ; i < nc##_X_ ; i++) { /* Var(X[j]) */		\
	    xx = &_X_[i * n];						\
	    sum = 0.;							\
	    if(!kendall) {						\
		xxm = _X_##m [i];					\
		for (k = 0 ; k < n ; k++)				\
		    if (ind[k] != 0)					\
			sum += (xx[k] - xxm) * (xx[k] - xxm);		\
		sum /= n1;						\
	    }								\
	    else { /* Kendall's tau */					\
		for (k = 0 ; k < n ; k++)				\
		    if (ind[k] != 0)					\
			for (n1 = 0 ; n1 < n ; n1++)			\
			    if (ind[n1] != 0 &&	 xx[k] != xx[n1])	\
				sum ++; /* = sign(. - .)^2 */		\
	    }								\
	    _X_##m [i] = sqrt(sum);					\
	}

	COV_SDEV(x); /* -> xm[.] */
	COV_SDEV(y); /* -> ym[.] */

	for (i = 0 ; i < ncx ; i++)
	    for (j = 0 ; j < ncy ; j++)
		if (xm[i] == 0. || ym[j] == 0.) {
		    *sd_0 = TRUE;
		    ANS(i,j) = NA_REAL;
		}
		else {
		    ANS(i,j) /= (xm[i] * ym[j]);
		    if(ANS(i,j) > 1.) ANS(i,j) = 1.;
		}
    }/* cor */

}/* cov_complete2 */

#undef ANS
#undef COV_init
#undef MEAN
#undef COV_SDEV

/* This might look slightly inefficient, but it is designed to
 * optimise paging in virtual memory systems ...
 * (or at least that's my story, and I'm sticking to it.)
*/
#define NA_LOOP								\
	for (i = 0 ; i < n ; i++)					\
	    if (ISNAN(z[i])) {						\
		if (na_fail) error(_("missing observations in cov/cor"));\
		else ind[i] = 0;					\
	    }

#define COMPLETE_1				\
    double *z;					\
    int i, j;					\
    for (i = 0 ; i < n ; i++)			\
	ind[i] = 1;				\
    for (j = 0 ; j < ncx ; j++) {		\
	z = &x[j * n];				\
	NA_LOOP					\
    }

static void complete1(int n, int ncx, double *x, int *ind, Rboolean na_fail)
{
    COMPLETE_1
}

static void
complete2(int n, int ncx, int ncy, double *x, double *y, int *ind, Rboolean na_fail)
{
    COMPLETE_1

    for(j = 0 ; j < ncy ; j++) {
	z = &y[j * n];
	NA_LOOP
    }
}
#undef NA_LOOP
#undef COMPLETE_1

/* cov | cor( x, y, use = {1,		2,		3}
			"all.obs", "complete.obs", "pairwise.complete.obs",
		    kendall = TRUE/FALSE) */
SEXP attribute_hidden do_cov(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP x, y, ans, xm, ym, ind;
    Rboolean cor, kendall, pair, na_fail, sd_0;
    int ansmat, method, n, ncx, ncy;

    checkArity(op, args);

    /* compute correlations if PRIMVAL(op) == 0,
	       covariances  if PRIMVAL(op) != 0 */
    cor = PRIMVAL(op);

    /* Arg.1: x */
    if (isNull(CAR(args)) || !LENGTH(CAR(args))) error(_("'x' is empty"));
    x = SETCAR(args, coerceVector(CAR(args), REALSXP));
    if ((ansmat = isMatrix(x))) {
	n = nrows(x);
	ncx = ncols(x);
    }
    else {
	n = length(x);
	ncx = 1;
    }
    args = CDR(args);
    /* Arg.2: y */
    if (isNull(CAR(args))) {/* y = x  : var() */
	y = R_NilValue;
	ncy = ncx;
    }
    else {
	y = SETCAR(args, coerceVector(CAR(args), REALSXP));
	if (isMatrix(y)) {
	    if (nrows(y) != n)
		errorcall(call, _("incompatible dimensions"));
	    ncy = ncols(y);
	    ansmat = (1);
	}
	else {
	    if (length(y) != n)
		errorcall(call, _("incompatible dimensions"));
	    ncy = 1;
	}
    }
    args = CDR(args);
    /* Arg.3:  method */
    method = asInteger(CAR(args));

    args = CDR(args);
    /* Arg.4:  kendall */
    kendall = asLogical(CAR(args));

    /* "default: complete" (easier for -Wall) */
    na_fail = FALSE;
    pair = FALSE;
    switch(method) {
    case 1:		/* use all :  no NAs */
	na_fail = TRUE;
	break;
    case 2:		/* complete */
	break;
    case 3:		/* pairwise.complete */
	pair = TRUE;
	break;
    default:
	errorcall(call, _("invalid 'use' (computational method)"));
    }
    if (ansmat) PROTECT(ans = allocMatrix(REALSXP, ncx, ncy));
    else PROTECT(ans = allocVector(REALSXP, ncx * ncy));
    sd_0 = FALSE;
    if (isNull(y)) {
	if (!pair) { /* all | complete "var" */
	    PROTECT(xm = allocVector(REALSXP, ncx));
	    PROTECT(ind = allocVector(INTSXP, n));
	    complete1(n, ncx, REAL(x), INTEGER(ind), na_fail);
	    cov_complete1(n, ncx, REAL(x), REAL(xm),
			  INTEGER(ind), REAL(ans), &sd_0, cor, kendall);
	    UNPROTECT(2);
	}
	else {		/* pairwise "var" */
	    cov_pairwise1(n, ncx, REAL(x), REAL(ans), &sd_0, cor, kendall);
	}
    }
    else { /* Co[vr] (x, y) */
	if (!pair) { /* all | complete */
	    PROTECT(xm = allocVector(REALSXP, ncx));
	    PROTECT(ym = allocVector(REALSXP, ncy));
	    PROTECT(ind = allocVector(INTSXP, n));
	    complete2(n, ncx, ncy, REAL(x), REAL(y), INTEGER(ind), na_fail);
	    cov_complete2(n, ncx, ncy, REAL(x), REAL(y), REAL(xm), REAL(ym),
			  INTEGER(ind), REAL(ans), &sd_0, cor, kendall);
	    UNPROTECT(3);
	}
	else {		/* pairwise */
	    cov_pairwise2(n, ncx, ncy, REAL(x), REAL(y), REAL(ans),
			  &sd_0, cor, kendall);
	}
    }
    if (ansmat) {
	if (isNull(y)) {
	    x = getAttrib(x, R_DimNamesSymbol);
	    if (!isNull(x) && !isNull(VECTOR_ELT(x, 1))) {
		PROTECT(ind = allocVector(VECSXP, 2));
		SET_VECTOR_ELT(ind, 0, duplicate(VECTOR_ELT(x, 1)));
		SET_VECTOR_ELT(ind, 1, duplicate(VECTOR_ELT(x, 1)));
		setAttrib(ans, R_DimNamesSymbol, ind);
		UNPROTECT(1);
	    }
	}
	else {
	    x = getAttrib(x, R_DimNamesSymbol);
	    y = getAttrib(y, R_DimNamesSymbol);
	    if ((!isNull(x) && !isNull(VECTOR_ELT(x, 1))) ||
		(!isNull(y) && !isNull(VECTOR_ELT(y, 1)))) {
		PROTECT(ind = allocVector(VECSXP, 2));
		if (!isNull(x) && !isNull(VECTOR_ELT(x, 1)))
		    SET_VECTOR_ELT(ind, 0, duplicate(VECTOR_ELT(x, 1)));
		if (!isNull(y) && !isNull(VECTOR_ELT(y, 1)))
		    SET_VECTOR_ELT(ind, 1, duplicate(VECTOR_ELT(y, 1)));
		setAttrib(ans, R_DimNamesSymbol, ind);
		UNPROTECT(1);
	    }
	}
    }
    if(sd_0)/* only in cor() */
	warningcall(call, _("the standard deviation is zero"));
    UNPROTECT(1);
    return ans;
}


syntax highlighted by Code2HTML, v. 0.9.1