#include "mrilib.h"

/*------------------------------------------------------------
  Set the one-sided tail probability at which we will cutoff
  the unusuality test.
--------------------------------------------------------------*/

static float zstar = 0.0 ;            /* the actual cutoff */
static float pstar = 0.0 ;            /* tail probability  */

void set_unusuality_tail( float p )
{
   if( p > 0.0 && p < 1.0 ){
      zstar = qginv(p) ;
      pstar = p ;
   }
   return ;
}

/*------------------------------------------------------------
  Inputs: rr[0..nr-1] = array of correlation coefficients.
--------------------------------------------------------------*/

#undef TANHALL

float unusuality( int nr , float * rr )
{
   int ii , nzero , mzero ;
   float * zz , * aa ;
   float zmid,zsig,zmed, uval, fac, zrat, ff,fp, ss,ds,pp,ee , sigma ;
#ifndef TANHALL
   float rmid , rcut ;
#endif

   if( nr < 1000 || rr == NULL ) return 0.0 ;

   /*-- make workspace --*/

   zz = (float *) malloc(sizeof(float)*nr*2) ; aa = zz + nr ;

   if( zstar <= 0.0 ){
      char * cp = getenv("PTAIL") ;
      float pp = 0.01 ;
      if( cp != NULL ){
         float xx = strtod( cp , NULL ) ;
         if( xx > 0.0 && xx < 1.0 ) pp = xx ;
      }
      set_unusuality_tail( pp ) ;
   }

   /*-- copy data into workspace, converting to atanh --*/

   memcpy( zz , rr , sizeof(float)*nr ) ;
   qsort_float( nr , zz ) ;                           /* sort now */

   /*- trim off 1's (perfect correlations) -*/

   for( ii=nr-1 ; ii > 0 && zz[ii] > 0.999 ; ii-- ) ; /* nada */
   if( ii == 0 ){ free(zz) ; return 0.0 ; }           /* shouldn't happen */
   nr = ii+1 ;                                        /* the trim */

#ifdef TANHALL
   for( ii=0 ; ii < nr ; ii++ ) zz[ii] = atanh(rr[ii]) ;
#endif

   /*-- find median of zz [brute force sort] --*/

   if( nr%2 == 1 )              /* median */
      zmid = zz[nr/2] ;
   else
      zmid = 0.5 * ( zz[nr/2] + zz[nr/2-1] ) ;

#ifdef TANHALL
   for( ii=0 ; ii < nr ; ii++ ) aa[ii] = fabs(zz[ii]-zmid) ;
#else
   rmid = zmid ; zmid = atanh(zmid) ;
   for( ii=0 ; ii < nr ; ii++ )
      aa[ii] = fabs( (zz[ii]-rmid)/(1.0-zz[ii]*rmid) ) ;
#endif

   /*-- find MAD of zz --*/

   qsort_float( nr , aa ) ;
   if( nr%2 == 1 )              /* MAD = median absolute deviation */
      zmed = aa[nr/2] ;
   else
      zmed = 0.5 * ( aa[nr/2] + aa[nr/2-1] ) ;

#ifndef TANHALL
   zmed = atanh(zmed) ;
#endif

   zsig = 1.4826 * zmed ;           /* estimate standard deviation of zz */
                                    /* 1/1.4826 = sqrt(2)*erfinv(0.5)    */

   if( zsig <= 0.0 ){               /* should not happen */
      free(zz) ; return 0.0 ;
   }

   /*-- normalize zz (is already sorted) --*/
   /*-- then, find values >= zstar       --*/

   fac = 1.0 / zsig ;
#ifdef TANHALL
   for( ii=0 ; ii < nr ; ii++ ) zz[ii] = fac * ( zz[ii] - zmid ) ;
   for( ii=nr-1 ; ii > 0 ; ii-- ) if( zz[ii] < zstar ) break ;
   nzero = ii+1 ; mzero = nr - nzero ;
#else
   rcut = tanh( zsig * zstar + zmid ) ;
   for( ii=nr-1 ; ii > 0 ; ii-- ){
      if( zz[ii] < rcut ) break ;
      else                zz[ii] = fac * ( atanh(zz[ii]) - zmid ) ;
   }
   nzero = ii+1 ; mzero = nr - nzero ;

#if 0
   fprintf(stderr,"uuu: nr=%d rcut=%g mzero=%d\n",nr,rcut,mzero) ;
#endif
#endif

   if( nzero < 2 || mzero < MAX(1.0,pstar*nr) ){          /* too weird */
      free(zz) ; return 0.0 ;
   }

   /*-- compute sigma-tilde squared --*/

   zsig = 0.0 ;
   for( ii=nzero ; ii < nr ; ii++ ) zsig += zz[ii]*zz[ii] ;
   zsig = zsig / mzero ;

   /*-- set up to compute f(s) --*/

#define SQRT_2PI 2.5066283

   zrat = zstar*zstar / zsig ;
   fac  = ( zrat * nzero ) / ( SQRT_2PI * mzero ) ;

   ss   = zstar ;          /* initial guess for s = zstar/sigma */

   /*-- Newton's method [almost] --*/

#undef  PHI
#define PHI(s) (1.0-0.5*normal_t2p(ss))           /* N(0,1) cdf */

   for( ii=0 ; ii < 5 ; ii++ ){
      pp = PHI(ss) ;                              /* Phi(ss) \approx 1 */
      ee = exp(-0.5*ss*ss) ;

      ff = ss*ss - (fac/pp) * ss * ee - zrat ;    /* f(s) */

      fp = 2.0*ss + (fac/pp) * ee * (ss*ss-1.0) ; /* f'(s) */

      ds = ff / fp ;                              /* Newton step */

#if 0
      fprintf(stderr,"Newton: ss=%g ds=%g ff=%g fp=%g pp=%g\n",ss,ds,ff,fp,pp) ;
#endif

      ss -= ds ;                                  /* update */
   }

   sigma = zstar / ss ;                           /* actual estimate of sigma */
                                                  /* from the upper tail data */

   if( sigma <= 1.0 ){                            /* the boring case */
      free(zz) ; return 0.0 ;
   }

   /*-- compute the log-likelihood difference next --*/

   uval =  nzero * log( PHI(ss)/(1.0-pstar) )
         - mzero * ( log(sigma) + 0.5 * zsig * (1.0/(sigma*sigma)-1.0) ) ;

   /*-- done! --*/

   free(zz) ; return uval ;
}


syntax highlighted by Code2HTML, v. 0.9.1