#ifdef SPARKY
#undef _POSIX_SOURCE
#endif
#include <sys/types.h>
#include <sys/time.h>
#include <sys/errno.h>
#include <sys/times.h>
#include <limits.h>

#include "mrilib.h"

#define FFT_LEN  128
#define FFT_POW  7
#define FFT_NUM  100000
#define MAX_NVEC 256

double COX_cpu_time(void) ;

int main( int argc , char * argv[] )
{
   int nvec , kk , ii , nvec_now , itop , iarg , fft_len,fft_pow,fft_ops,fft_num,max_nvec ;
   complex * cx ;
   float fac ;
   double cptime , mflops ;

   if( argc > 1 && strncmp(argv[1],"-help",5) == 0 ){
      printf("Usage: csfft_cache [-len #] [-num #] [-maxvec #]\n"
             "* Does lots of FFTs and determines how many at once are efficient.\n"
             "Options:\n"
             "  -len #     sets FFT length to '#' (power of 2); default = 128\n"
             "  -num #     number of FFTs to do for timing;  default = 100000\n"
             "  -maxvec #  maximum number of FFTs to do at once; default = 256\n"
            ) ;
      exit(0) ;
   }

   fft_len  = FFT_LEN ;
   fft_pow  = FFT_POW ;
   fft_num  = FFT_NUM ;
   max_nvec = MAX_NVEC ;
   iarg = 1 ;
   while( iarg < argc && argv[iarg][0] == '-' ){
      if( strncmp(argv[iarg],"-len",4) == 0 ){
         fft_len = strtol( argv[++iarg] , NULL , 10 ) ;
         if( fft_len < 4 ){fprintf(stderr,"illegal -len\a\n");exit(1);}
         for( ii=2,kk=4 ; kk < fft_len ; kk *= 2,ii++ ) ; /* nada */
         if( kk != fft_len ){fprintf(stderr,"illegal -len\a\n");exit(1);}
         fft_pow = ii ;
         iarg++ ; continue ;
      }

      if( strncmp(argv[iarg],"-num",4) == 0 ){
         fft_num = strtol( argv[++iarg] , NULL , 10 ) ;
         if( fft_num < 1 ){fprintf(stderr,"illegal -num\a\n");exit(1);}
         iarg++ ; continue ;
      }

      if( strncmp(argv[iarg],"-maxvec",5) == 0 ){
         max_nvec = strtol( argv[++iarg] , NULL , 10 ) ;
         if( max_nvec < 2 ){fprintf(stderr,"illegal -maxvec\a\n");exit(1);}
         iarg++ ; continue ;
      }
   }
   fft_ops = (5*fft_pow+2)*fft_len ;
   fac = sqrt(1.0/fft_len) ;
  
   cx = (complex *) malloc( sizeof(complex) * fft_len * max_nvec ) ;
   for( ii=0 ; ii < fft_len ; ii++ ){ cx[ii].r = ii ; cx[ii].i = -0.3*ii ; }

   csfft_cox( -1 , fft_len , cx ) ;  /* to initialize trig constants */

   for( nvec=1 ; nvec <= max_nvec ; nvec = (nvec<4) ? (nvec+1) : (1.414*nvec+0.5) ){
      for( ii=0 ; ii < fft_len*nvec ; ii++ ){ cx[ii].r = ii ; cx[ii].i = -0.3*ii ; }

      cptime = COX_cpu_time() ;
#if 0
      if( nvec == 1 ){
#else
      if( nvec == 0 ){
#endif
         for( kk=0 ; kk < fft_num ; kk++ ){
            csfft_cox( -1 , fft_len , cx ) ;
            for( ii=0 ; ii < fft_len ; ii++ ){ cx[ii].r *= fac ; cx[ii].i *= fac ; }
         }
      } else {
         int nvec_now ;
         for( kk=0 ; kk < fft_num ; kk+=nvec ){
            nvec_now = fft_num - kk ; if( nvec_now > nvec ) nvec_now = nvec ;
            csfft_many( -1 , fft_len , nvec_now , cx ) ;
            itop = fft_len*nvec_now ;
            for( ii=0 ; ii < itop ; ii++ ){ cx[ii].r *= fac ; cx[ii].i *= fac ; }
         }
      }
      cptime = COX_cpu_time() - cptime ;
      mflops = fft_num * fft_ops / ( 1.e6 * cptime ) ;
      printf("FFT cache size %6d bytes gives %10.2f Mflops"
             " (%d FFTs, size %d, %d at a time, in %g sec)\n",
             sizeof(complex)*fft_len*nvec , mflops , fft_num,fft_len,nvec,cptime) ;
   }

   exit(0) ;
}

double COX_cpu_time(void)  /* in seconds */
{
   struct tms ttt ;
   (void) times( &ttt ) ;
   return (  (double) (ttt.tms_utime) / (double) CLK_TCK ) ;
}


syntax highlighted by Code2HTML, v. 0.9.1