/***************************************************************************** Major portions of this software are copyrighted by the Medical College of Wisconsin, 1994-2001, and are released under the Gnu General Public License, Version 2. See the file README.Copyright for details. ******************************************************************************/ /* This program performs the nonparametric Wilcoxon-Mann-Whitney two-sample test for determining whether the observations come from the same population. The output consists of an AFNI 'fizt' dataset; the first sub-brick contains an estimate of the treatment effect, the second sub-brick contains the normalized Wilcoxon rank-sum statistic. File: 3dMannWhitney.c Author: B. Douglas Ward Date: 23 July 1997 Mod: Added changes for incorporating History notes. Date: 08 September 1999 Mod: Replaced dataset input code with calls to THD_open_dataset, to allow operator selection of individual sub-bricks for input. Date: 03 December 1999 Mod: Added call to AFNI_logger. Date: 15 August 2001 Mod: Modified routine write_afni_fizt of NPstats.c so that all output subbricks will now have the scaled short integer format. Date: 14 March 2002 Mod: Set MAX_NAME_LENGTH equal to THD_MAX_NAME. Date: 02 December 2002 */ /*---------------------------------------------------------------------------*/ #define PROGRAM_NAME "3dMannWhitney" /* name of this program */ #define PROGRAM_AUTHOR "B. Douglas Ward" /* program author */ #define PROGRAM_INITIAL "23 July 1997" /* date of initial program release */ #define PROGRAM_LATEST "02 Dec 2002" /* date of latest program revision */ /*---------------------------------------------------------------------------*/ #include #include #include "mrilib.h" #define MAX_OBSERVATIONS 100 /* max. number of observations per cell */ #define MAX_NAME_LENGTH THD_MAX_NAME /* max. string length for file names */ #define MEGA 1048576 /* one megabyte */ /*---------------------------------------------------------------------------*/ typedef struct NP_options { int datum; /* data type for "intensity" data subbrick */ char session[MAX_NAME_LENGTH]; /* name of output directory */ int nvoxel; /* number of voxel for special output */ int m; /* number of X observations */ int n; /* number of Y observations */ char *** xname; /* names of the input data files */ char * first_dataset; /* name of the first data set */ int nx, ny, nz; /* data set dimensions */ int nxyz; /* number of voxels per image */ int workmem; /* working memory */ char * outfile; /* name of output file */ } NP_options; #include "NPstats.c" /*---------------------------------------------------------------------------*/ /* Routine to display 3dMannWhitney help menu. */ void display_help_menu() { printf ( "This program performs nonparametric Mann-Whitney two-sample test. \n\n" "Usage: \n" "3dMannWhitney \n" "-dset 1 filename data set for X observations \n" " . . . . . . \n" "-dset 1 filename data set for X observations \n" "-dset 2 filename data set for Y observations \n" " . . . . . . \n" "-dset 2 filename data set for Y observations \n" " \n" "[-workmem mega] number of megabytes of RAM to use \n" " for statistical workspace \n" "[-voxel num] screen output for voxel # num \n" "-out prefixname estimated population delta and \n" " Wilcoxon-Mann-Whitney statistics \n" " written to file prefixname \n" "\n"); printf ( "\n" "N.B.: For this program, the user must specify 1 and only 1 sub-brick \n" " with each -dset command. That is, if an input dataset contains \n" " more than 1 sub-brick, a sub-brick selector must be used, e.g.: \n" " -dset 2 'fred+orig[3]' \n" ); printf("\n" MASTER_SHORTHELP_STRING ) ; exit(0); } /*---------------------------------------------------------------------------*/ /* Routine to initialize input options. */ void initialize_options (NP_options * option_data) { int i; /* index */ option_data->datum = ILLEGAL_TYPE; strcpy (option_data->session, "./"); option_data->nvoxel = -1; option_data->m = 0; option_data->n = 0; option_data->workmem = 12; /*----- allocate memory for storing data file names -----*/ option_data->xname = (char ***) malloc (sizeof(char **) * 2); for (i = 0; i < 2; i++) option_data->xname[i] = (char **) malloc (sizeof(char *) * MAX_OBSERVATIONS); option_data->first_dataset = NULL; option_data->nx = 0; option_data->ny = 0; option_data->nz = 0; option_data->nxyz = 0; option_data->outfile = NULL; } /*---------------------------------------------------------------------------*/ /* Routine to get user specified Mann-Whitney options. */ void get_options (int argc, char ** argv, NP_options * option_data) { int nopt = 1; /* input option argument counter */ int ival; /* integer input */ int nijk; /* count of data files */ float fval; /* float input */ THD_3dim_dataset * dset=NULL; /* test whether data set exists */ char message[MAX_NAME_LENGTH]; /* error message */ /*----- does user request help menu? -----*/ if (argc < 2 || strncmp(argv[1], "-help", 5) == 0) display_help_menu(); /*----- add to program log -----*/ AFNI_logger (PROGRAM_NAME,argc,argv); /*----- initialize the input options -----*/ initialize_options (option_data); /*----- main loop over input options -----*/ while (nopt < argc) { /*----- -datum type -----*/ if( strncmp(argv[nopt],"-datum",6) == 0 ){ if( ++nopt >= argc ) NP_error("need an argument after -datum!") ; if( strcmp(argv[nopt],"short") == 0 ){ option_data->datum = MRI_short ; } else if( strcmp(argv[nopt],"float") == 0 ){ option_data->datum = MRI_float ; } else { char buf[256] ; sprintf(buf,"-datum of type '%s' is not supported in 3dMannWhitney.", argv[nopt] ) ; NP_error(buf) ; } nopt++ ; continue ; /* go to next arg */ } /*----- -session dirname -----*/ if( strncmp(argv[nopt],"-session",6) == 0 ){ nopt++ ; if( nopt >= argc ) NP_error("need argument after -session!") ; strcpy(option_data->session , argv[nopt++]) ; continue ; } /*----- -voxel num -----*/ if (strncmp(argv[nopt], "-voxel", 6) == 0) { nopt++; if (nopt >= argc) NP_error ("need argument after -voxel "); sscanf (argv[nopt], "%d", &ival); if (ival <= 0) NP_error ("illegal argument after -voxel "); option_data->nvoxel = ival; nopt++; continue; } /*----- -workmem megabytes -----*/ if( strncmp(argv[nopt],"-workmem",6) == 0 ){ nopt++ ; if( nopt >= argc ) NP_error ("need argument after -workmem!") ; sscanf (argv[nopt], "%d", &ival); if( ival <= 0 ) NP_error ("illegal argument after -workmem!") ; option_data->workmem = ival ; nopt++ ; continue ; } /*----- -dset level filename -----*/ if (strncmp(argv[nopt], "-dset", 5) == 0) { nopt++; if (nopt+1 >= argc) NP_error ("need 2 arguments after -dset "); sscanf (argv[nopt], "%d", &ival); if ((ival <= 0) || (ival > 2)) NP_error ("illegal argument after -dset "); if (ival == 1) { option_data->m += 1; nijk = option_data->m; } else { option_data->n += 1; nijk = option_data->n; } if (nijk > MAX_OBSERVATIONS) NP_error ("too many data files"); /*--- check whether input files exist ---*/ nopt++; dset = THD_open_dataset( argv[nopt] ) ; CHECK_OPEN_ERROR(dset,argv[nopt]) ; /*--- check number of selected sub-bricks ---*/ if (DSET_NVALS(dset) != 1) { sprintf(message,"Must specify exactly 1 sub-brick for file %s\n", argv[nopt]); NP_error (message); } THD_delete_3dim_dataset( dset , False ) ; dset = NULL ; option_data->xname[ival-1][nijk-1] = malloc (sizeof(char) * MAX_NAME_LENGTH); strcpy (option_data->xname[ival-1][nijk-1], argv[nopt]); nopt++; continue; } /*----- -out filename -----*/ if (strncmp(argv[nopt], "-out", 4) == 0) { nopt++; if (nopt >= argc) NP_error ("need argument after -out "); option_data->outfile = malloc (sizeof(char) * MAX_NAME_LENGTH); strcpy (option_data->outfile, argv[nopt]); nopt++; continue; } /*----- unknown command -----*/ NP_error ("unrecognized command line option "); } } /*---------------------------------------------------------------------------*/ /* Routine to check for valid inputs. */ void check_for_valid_inputs (NP_options * option_data) { if (option_data->m < 1) NP_error ("too few data sets for X-observations "); if (option_data->n < 1) NP_error ("too few data sets for Y-observations "); if (option_data->nvoxel > option_data->nxyz) NP_error ("argument of -voxel is too large"); } /*---------------------------------------------------------------------------*/ /* Routine to perform all Mann-Whitney initialization. */ void initialize ( int argc, /* number of input arguments */ char ** argv, /* array of input arguments */ NP_options ** option_data, /* user input options */ float ** delta, /* estimated shift parameter */ float **zvar /* normalized Mann-Whitney statistic */ ) { /*----- allocate memory space for input data -----*/ *option_data = (NP_options *) malloc(sizeof(NP_options)); if (*option_data == NULL) NP_error ("memory allocation error"); /*----- get command line inputs -----*/ get_options(argc, argv, *option_data); /*----- use first data set to get data set dimensions -----*/ (*option_data)->first_dataset = (*option_data)->xname[0][0]; get_dimensions (*option_data); printf ("Data set dimensions: nx = %d ny = %d nz = %d nxyz = %d \n", (*option_data)->nx, (*option_data)->ny, (*option_data)->nz, (*option_data)->nxyz); /*----- check for valid inputs -----*/ check_for_valid_inputs (*option_data); /*----- check whether output files already exist -----*/ if( THD_deathcon() ) check_one_output_file (*option_data, (*option_data)->outfile); /*----- allocate memory -----*/ *delta = (float *) malloc(sizeof(float) * (*option_data)->nxyz); if (*delta == NULL) NP_error ("memory allocation error"); *zvar = (float *) malloc(sizeof(float) * (*option_data)->nxyz); if (*zvar == NULL) NP_error ("memory allocation error"); } /*---------------------------------------------------------------------------*/ /* Calculate the normalized Wilcoxon-Mann-Whitney statistic. */ void calc_stat ( int nvox, /* flag for voxel output */ int m, /* number of control subjects */ int n, /* number of treatment subjects */ float * xarray, /* array of control data (x data) */ float * yarray, /* array of treatment data (y data) */ float * zvar /* normalized Mann-Whitney statistic */ ) { const float EPSILON = 1.0e-10; /* minimum variance limit */ int i, j; /* array indices */ node * head = NULL; /* points to head of list */ node * ptr = NULL; /* points to current position in list */ int NN; /* total number of sample points */ float wy; /* rank sum statistic */ float ewy; /* expected value of wy */ float varwy; /* variance of wy */ int d; /* count of number of ties */ float corr; /* correction to variance to account for ties */ float rank; /* rank of data point */ /*----- enter and sort x-array -----*/ for (i = 0; i < m; i++) node_addvalue (&head, xarray[i]); /*----- enter and sort y-array -----*/ for (j = 0; j < n; j++) node_addvalue (&head, yarray[j]); /*----- if display voxel, write the ranks of the input data -----*/ if (nvox > 0) { printf ("\n"); printf ("X ranks: \n"); for (i = 0; i < m; i++) { rank = node_get_rank (head, xarray[i]); printf (" %6.1f", rank); if (((i+1) % 10 == 0) && (i < m-1)) printf ("\n"); } printf ("\n\n"); printf ("Y ranks: \n"); for (i = 0; i < n; i++) { rank = node_get_rank (head, yarray[i]); printf (" %6.1f", rank); if (((i+1) % 10 == 0) && (i < n-1)) printf ("\n"); } printf ("\n\n"); } /*----- calculate rank sum of y-values -----*/ wy = 0.0; for (j = 0; j < n; j++) wy += node_get_rank (head, yarray[j]); if (nvox > 0) printf ("Wy = %f \n", wy); /*----- calculate expected value of rank sum statistic -----*/ NN = m + n; ewy = 0.5 * n * (NN+1); if (nvox > 0) printf ("E(Wy) = %f \n", ewy); /*----- calculate variance of rank sum statisitc -----*/ varwy = m * n * (NN+1) / 12.0; /*----- correction to variance due to ties in data -----*/ corr = 0.0; ptr = head; while (ptr != NULL) { d = ptr->d; corr += d*d*d - d; ptr = ptr->next; } corr *= m * n / (12.0 * NN * (NN - 1)); varwy -= corr; if (nvox > 0) printf ("Var(Wy) = %f \n", varwy); /*----- calculate normalized Wilcoxon-Mann-Whitney rank-sum statistic -----*/ if (varwy < EPSILON) *zvar = 0.0; else *zvar = (wy - ewy) / sqrt(varwy); if (nvox > 0) printf ("Z = %f \n", *zvar); /*----- deallocate memory -----*/ list_delete (&head); } /*---------------------------------------------------------------------------*/ /* Calculate the estimated shift parameter, using the median of the Walsh differences. */ void calc_shift ( int nvox, /* flag for voxel output */ int m, /* number of control subjects */ int n, /* number of treatment subjects */ float * xarray, /* array of control data (x data) */ float * yarray, /* array of treatment data (y data) */ float * delta_hat /* median of differences */ ) { int i, j; /* array indices */ int mn; /* number of Walsh differences */ node * head = NULL; /* points to head of list */ int count; /* list print counter */ /*----- enter and sort array of differences -----*/ for (i = 0; i < m; i++) for (j = 0; j < n; j++) node_addvalue (&head, yarray[j] - xarray[i]); /*----- if output requested, write the array of ordered differences -----*/ if (nvox > 0) { printf ("\n"); printf ("Ordered differences: \n"); count = 0; list_print (head, &count); printf ("\n"); } /*----- find median of differences -----*/ mn = m * n; *delta_hat = node_get_median (head, mn); if (nvox > 0) { printf ("\n"); printf ("Delta hat = %f \n\n", *delta_hat); } /*----- deallocate memory -----*/ list_delete (&head); } /*---------------------------------------------------------------------------*/ /* Calculate the Mann-Whitney statistic and shift parameter for a single voxel. */ void process_voxel ( int nvox, /* flag for voxel output */ int m, /* number of control subjects */ int n, /* number of treatment subjects */ float * xarray, /* array of control data (x data) */ float * yarray, /* array of treatment data (y data) */ float * delta_hat, /* estimated shift parameter */ float * zvar /* normalized Mann-Whitney statistic */ ) { int i; /* array index */ /*----- check for voxel output -----*/ if (nvox > 0) { printf ("\nResults for voxel #%d : \n", nvox); printf ("\n"); printf ("X data: \n"); for (i = 0; i < m; i++) { printf (" %6.1f", xarray[i]); if (((i+1) % 10 == 0) && (i < m-1)) printf ("\n"); } printf ("\n\n"); printf ("Y data: \n"); for (i = 0; i < n; i++) { printf (" %6.1f", yarray[i]); if (((i+1) % 10 == 0) && (i < n-1)) printf ("\n"); } printf ("\n"); } /*----- calculate normalized Mann-Whitney statistic -----*/ calc_stat (nvox, m, n, xarray, yarray, zvar); /*----- estimate shift parameter -----*/ calc_shift (nvox, m, n, xarray, yarray, delta_hat); } /*---------------------------------------------------------------------------*/ /* Calculate the Wilcoxon-Mann-Whitney rank-sum statistics for all voxels (by breaking the datasets into sub-volumes, if necessary). */ void calculate_results ( NP_options * option_data, /* user input options */ float * delta, /* estimated shift parameter */ float * zvar /* normalized Mann-Whitney statistic */ ) { int i; /* dataset index */ int m; /* control sample size */ int n; /* treatment sample size */ int nxyz; /* number of voxels per dataset */ int num_datasets; /* total number of datasets */ int piece_size; /* number of voxels in dataset sub-volume */ int num_pieces; /* dataset is divided into this many pieces */ int piece; /* piece index */ int piece_len; /* number of voxels in current sub-volume */ int fim_offset; /* array offset to current sub-volume */ int ivox; /* index to voxels in current sub-volume */ int nvox; /* index of voxel within entire volume */ float delta_hat; /* estimate of shift parameter */ float z; /* normalized Mann-Whitney statistic */ float ** xfimar; /* array of pieces of X-datasets */ float ** yfimar; /* array of pieces of Y-datasets */ float * xarray; /* array of control data (X-data) */ float * yarray; /* array of treatment data (Y-data) */ /*----- initialize local variables -----*/ m = option_data->m; n = option_data->n; nxyz = option_data->nxyz; num_datasets = m + n; /*----- break problem into smaller pieces -----*/ piece_size = option_data->workmem * MEGA / (num_datasets * sizeof(float)); if (piece_size > nxyz) piece_size = nxyz; num_pieces = (nxyz + piece_size - 1) / piece_size; printf ("num_pieces = %d piece_size = %d \n", num_pieces, piece_size); /*----- allocate memory space -----*/ xarray = (float *) malloc (sizeof(float) * m); MTEST(xarray); yarray = (float *) malloc (sizeof(float) * n); MTEST(yarray); xfimar = (float **) malloc (sizeof(float *) * m); MTEST(xfimar); yfimar = (float **) malloc (sizeof(float *) * n); MTEST(yfimar); for (i = 0; i < m; i++) { xfimar[i] = (float *) malloc(sizeof(float) * piece_size); MTEST(xfimar[i]); } for (i = 0; i < n; i++) { yfimar[i] = (float *) malloc(sizeof(float) * piece_size); MTEST(yfimar[i]); } /*----- loop over the pieces of the input datasets -----*/ nvox = 0; for (piece = 0; piece < num_pieces; piece++) { printf ("piece = %d \n", piece); fim_offset = piece * piece_size; if (piece < num_pieces-1) piece_len = piece_size; else piece_len = nxyz - fim_offset; /*----- read in the X-data -----*/ for (i = 0; i < m; i++) read_afni_data (option_data, option_data->xname[0][i], piece_len, fim_offset, xfimar[i]); /*----- read in the Y-data -----*/ for (i = 0; i < n; i++) read_afni_data (option_data, option_data->xname[1][i], piece_len, fim_offset, yfimar[i]); /*----- loop over voxels in this piece -----*/ for (ivox = 0; ivox < piece_len; ivox++) { nvox += 1; for (i = 0; i < m; i++) xarray[i] = xfimar[i][ivox]; for (i = 0; i < n; i++) yarray[i] = yfimar[i][ivox]; /*----- calculate results for this voxel -----*/ if (nvox == option_data->nvoxel) process_voxel (nvox, m, n, xarray, yarray, &delta_hat, &z); else process_voxel (-1, m, n, xarray, yarray, &delta_hat, &z); /*----- save results for this voxel -----*/ delta[ivox+fim_offset] = delta_hat; zvar[ivox+fim_offset] = z; } } /* loop over pieces */ /*----- deallocate memory -----*/ free (xarray); xarray = NULL; free (yarray); yarray = NULL; for (i = 0; i < m; i++) { free (xfimar[i]); xfimar[i] = NULL; } free (xfimar); xfimar = NULL; for (i = 0; i < n; i++) { free (yfimar[i]); yfimar[i] = NULL; } free (yfimar); yfimar = NULL; } /*---------------------------------------------------------------------------*/ /* Generate the requested output. */ void output_results ( int argc, /* number of input arguments */ char ** argv, /* array of input arguments */ NP_options * option_data, /* user input options */ float * delta, /* estimated shift parameter */ float * zvar /* normalized Mann-Whitney rank-sum statistic */ ) { /*----- write out afni fizt data file -----*/ write_afni_fizt (argc, argv, option_data, option_data->outfile, delta, zvar); } /*---------------------------------------------------------------------------*/ /* Routine to release memory and remove any remaining temporary data files. */ void terminate ( NP_options ** option_data, /* user input options */ float ** delta, /* estimated shift parameter */ float ** zvar /* normalized Mann-Whitney rank-sum statistic */ ) { int i, j; /* dataset indices */ /*----- deallocate memory -----*/ for (j = 0; j < (*option_data)->m; j++) { free ((*option_data)->xname[0][j]); (*option_data)->xname[0][j] = NULL; } for (j = 0; j < (*option_data)->n; j++) { free ((*option_data)->xname[1][j]); (*option_data)->xname[1][j] = NULL; } for (i = 0; i < 2; i++) { free ((*option_data)->xname[i]); (*option_data)->xname[i] = NULL; } free ((*option_data)->xname); (*option_data)->xname = NULL; if ((*option_data)->outfile != NULL) { free ((*option_data)-> outfile); (*option_data)->outfile = NULL; } free (*option_data); *option_data = NULL; free (*delta); *delta = NULL; free (*zvar); *zvar = NULL; } /*---------------------------------------------------------------------------*/ /* Perform nonparametric Wilcoxon-Mann-Whitney rank-sum two sample test. */ int main (int argc, char ** argv) { NP_options * option_data = NULL; /* user input options */ float * delta; /* estimated shift parameter */ float * zvar; /* normalized Mann-Whitney statistic */ /*----- Identify software -----*/ #if 0 printf ("\n\n"); printf ("Program: %s \n", PROGRAM_NAME); printf ("Author: %s \n", PROGRAM_AUTHOR); printf ("Initial Release: %s \n", PROGRAM_INITIAL); printf ("Latest Revision: %s \n", PROGRAM_LATEST); printf ("\n"); #endif /*-- 20 Apr 2001: addto the arglist, if user wants to [RWCox] --*/ PRINT_VERSION("3dMannWhitney") ; AUTHOR(PROGRAM_AUTHOR); mainENTRY("3dMannWhitney main") ; machdep() ; { int new_argc ; char ** new_argv ; addto_args( argc , argv , &new_argc , &new_argv ) ; if( new_argv != NULL ){ argc = new_argc ; argv = new_argv ; } } /*----- program initialization -----*/ initialize (argc, argv, &option_data, &delta, &zvar); /*----- calculate nonparameteric Mann-Whitney statistics -----*/ calculate_results (option_data, delta, zvar); /*----- generate requested output -----*/ output_results (argc, argv, option_data, delta, zvar); /*----- terminate program -----*/ terminate (&option_data, &delta, &zvar); exit(0); }