// Copyright 2003 Regents of the University of California


// SETI_BOINC is free software; you can redistribute it and/or modify it under

// the terms of the GNU General Public License as published by the Free

// Software Foundation; either version 2, or (at your option) any later

// version.


// SETI_BOINC is distributed in the hope that it will be useful, but WITHOUT

// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for

// more details.


// Copyright 2003 Regents of the University of California


// SETI_BOINC is free software; you can redistribute it and/or modify it under

// the terms of the GNU General Public License as published by the Free

// Software Foundation; either version 2, or (at your option) any later

// version.


// SETI_BOINC is distributed in the hope that it will be useful, but WITHOUT

// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for

// more details.


// You should have received a copy of the GNU General Public License along

// with SETI_BOINC; see the file COPYING.  If not, write to the Free Software

// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// In addition, as a special exception, the Regents of the University of

// California give permission to link the code of this program with libraries

// that provide specific optimized fast Fourier transform (FFT) functions and

// distribute a linked executable.  You must obey the GNU General Public 

// License in all respects for all of the code used other than the FFT library

// itself.  Any modification required to support these libraries must be

// distributed in source code form.  If you modify this file, you may extend 

// this exception to your version of the file, but you are not obligated to 

// do so. If you do not wish to do so, delete this exception statement from 

// your version.



/*
 * $Id: pulsefind.cpp,v 1.16.2.15 2007/05/31 22:03:10 korpela Exp $
 *
 */

#include "sah_config.h"


#include <cstdio>

#include <cstdlib>

#include <cmath>

#include <climits>

#include <cstring>

#include <ctime>


#include "diagnostics.h"

#include "util.h"

#include "s_util.h"

#include "boinc_api.h"

#ifdef BOINC_APP_GRAPHICS

#include "graphics_api.h"

#ifdef DYNAMIC_GRAPHICS

#include "graphics_lib.h"

#endif

#endif


#include "seti.h"

#include "seti_header.h"

#include "analyzePoT.h"

#include "analyzeReport.h"

#include "worker.h"

#include "malloc_a.h"

#include "lcgamm.h"

#include "pulsefind.h"


//#define DEBUG_TRIPLET_VERBOSE

//#define DEBUG_PULSE_VERBOSE

//#define DEBUG_PULSE_BOUNDS



/**********************
 *
 *   find_triplets - Analyzes the input signal for a triplet signal.
 *   
 *   Written by Eric Heien.
 */
int find_triplets( const float *power, int len_power, float triplet_thresh, int time_bin, int freq_bin ) {
  static int    *binsAboveThreshold;
  int            i,n,numBinsAboveThreshold=0,p,q;
  float        midpoint,mean_power=0,peak_power,period;

  if (!binsAboveThreshold) {
    binsAboveThreshold=(int *)malloc_a(PoTInfo.TripletMax*sizeof(int),MEM_ALIGN);
    if (!binsAboveThreshold) SETIERROR(MALLOC_FAILED, "!binsAboveThreshold");
  }

  /* Get all the bins that are above the threshold, and find the power array mean value */
#ifdef DEBUG_TRIPLET_VERBOSE

  fprintf(stderr, "In find_triplets()...   PulsePotLen = %d   triplet_thresh = %f   TOffset = %d   PoT = %d\n",  len_power,  triplet_thresh, time_bin, freq_bin);
#endif


  for( i=0;i<len_power;i++ ) {
    mean_power += power[i];
  }
  mean_power /= (float)len_power;
  triplet_thresh*=mean_power;
  for( i=0;i<len_power;i++ ) {
    if( power[i] >= triplet_thresh ) {
      binsAboveThreshold[numBinsAboveThreshold] = i;
      numBinsAboveThreshold++;
    }
  }
  analysis_state.FLOP_counter+=10.0+len_power;

  /* Check each bin combination for a triplet */
  if (numBinsAboveThreshold>2) {
    for( i=0;i<numBinsAboveThreshold-1;i++ ) {
      for( n=i+1;n<numBinsAboveThreshold;n++ ) {
        midpoint = (binsAboveThreshold[i]+binsAboveThreshold[n])/2.0f;
        period = (float)fabs((binsAboveThreshold[i]-binsAboveThreshold[n])/2.0f);

        /* Get the peak power of this triplet */
        peak_power = power[binsAboveThreshold[i]];

        if( power[binsAboveThreshold[n]] > peak_power )
          peak_power = power[binsAboveThreshold[n]];

        p = binsAboveThreshold[i];
        while( (power[p] >= triplet_thresh) && (p <= (static_cast<int>(floor(midpoint)))) ) {    /* Check if there is a pulse "off" in between init and midpoint */
          p++;
        }

        q = static_cast<int>(floor(midpoint))+1;
        while( (power[q] >= triplet_thresh) && (q <= binsAboveThreshold[n])) {    /* Check if there is a pulse "off" in between midpoint and end */
          q++;
        }

        if( p >= static_cast<int>(floor(midpoint)) || q >= binsAboveThreshold[n]) {
          /* if this pulse doesn't have an "off" between all the three spikes, it's dropped */
        } else if( (midpoint - floor(midpoint)) > 0.1f ) {    /* if it's spread among two bins */
          if( power[(int)midpoint] >= triplet_thresh ) {
            if( power[(int)midpoint] > peak_power )
              peak_power = power[(int)midpoint];

            ReportTripletEvent( peak_power/mean_power, mean_power, period, midpoint,time_bin, freq_bin, len_power, power, 1 );
          }

          if( power[(int)(midpoint+1.0f)] >= triplet_thresh ) {
            if( power[(int)(midpoint+1.0f)] > peak_power )
              peak_power = power[(int)(midpoint+1.0f)];

            ReportTripletEvent( peak_power/mean_power, mean_power, period, midpoint,time_bin, freq_bin, len_power, power, 1 );
          }
        } else {            /* otherwise just check the single midpoint bin */
          if( power[(int)midpoint] >= triplet_thresh ) {
            if( power[(int)midpoint] > peak_power )
              peak_power = power[(int)midpoint];

            ReportTripletEvent( peak_power/mean_power, mean_power, period, midpoint,time_bin, freq_bin, len_power, power, 1 );
          }
        }
      }
    }
  }
  analysis_state.FLOP_counter+=(10.0*numBinsAboveThreshold*numBinsAboveThreshold);
  return (0);
}



/**********************
 *
 * Pulse finding
 *
 *
 * Default folding subroutines, FPU optimized
 *
 */
float sw_sum3_t31(float *ss[], struct PoTPlan *P) {
  float sum1, sum2, tmax2, tmax1;
  int i = P->di;
  float *one   = ss[0];
  float *two   = ss[0]+P->tmp0;
  float *three = ss[0]+P->tmp1;
  tmax2 = tmax1 = float(0.0);

  if ( i & 1 ) {
    i -= 1;
    tmax1 = one[i] + two[i];
    tmax1 += three[i];
    P->dest[i] = tmax1;
  }
  switch (i) {
    case 30:
      sum1 = one[29] + two[29];           sum2 = one[28] + two[28];
      sum1 += three[29];                  sum2 += three[28];
      P->dest[29] = sum1;                 P->dest[28] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 28:
      sum1 = one[27] + two[27];           sum2 = one[26] + two[26];
      sum1 += three[27];                  sum2 += three[26];
      P->dest[27] = sum1;                 P->dest[26] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 26:
      sum1 = one[25] + two[25];           sum2 = one[24] + two[24];
      sum1 += three[25];                  sum2 += three[24];
      P->dest[25] = sum1;                 P->dest[24] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 24:
      sum1 = one[23] + two[23];           sum2 = one[22] + two[22];
      sum1 += three[23];                  sum2 += three[22];
      P->dest[23] = sum1;                 P->dest[22] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 22:
      sum1 = one[21] + two[21];           sum2 = one[20] + two[20];
      sum1 += three[21];                  sum2 += three[20];
      P->dest[21] = sum1;                 P->dest[20] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 20:
      sum1 = one[19] + two[19];           sum2 = one[18] + two[18];
      sum1 += three[19];                  sum2 += three[18];
      P->dest[19] = sum1;                 P->dest[18] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 18:
      sum1 = one[17] + two[17];           sum2 = one[16] + two[16];
      sum1 += three[17];                  sum2 += three[16];
      P->dest[17] = sum1;                 P->dest[16] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 16:
      sum1 = one[15] + two[15];           sum2 = one[14] + two[14];
      sum1 += three[15];                  sum2 += three[14];
      P->dest[15] = sum1;                 P->dest[14] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 14:
      sum1 = one[13] + two[13];           sum2 = one[12] + two[12];
      sum1 += three[13];                  sum2 += three[12];
      P->dest[13] = sum1;                 P->dest[12] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 12:
      sum1 = one[11] + two[11];           sum2 = one[10] + two[10];
      sum1 += three[11];                  sum2 += three[10];
      P->dest[11] = sum1;                 P->dest[10] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 10:
      sum1 = one[9] + two[9];             sum2 = one[8] + two[8];
      sum1 += three[9];                   sum2 += three[8];
      P->dest[9] = sum1;                  P->dest[8] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 8:
      sum1 = one[7] + two[7];             sum2 = one[6] + two[6];
      sum1 += three[7];                   sum2 += three[6];
      P->dest[7] = sum1;                  P->dest[6] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 6:
      sum1 = one[5] + two[5];             sum2 = one[4] + two[4];
      sum1 += three[5];                   sum2 += three[4];
      P->dest[5] = sum1;                  P->dest[4] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 4:
      sum1 = one[3] + two[3];             sum2 = one[2] + two[2];
      sum1 += three[3];                   sum2 += three[2];
      P->dest[3] = sum1;                  P->dest[2] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 2:
      sum1 = one[1] + two[1];             sum2 = one[0] + two[0];
      sum1 += three[1];                   sum2 += three[0];
      P->dest[1] = sum1;                  P->dest[0] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
  }
  if (tmax1 > tmax2) return tmax1;
  return tmax2;
}
//

// use for higher sum3 folds

//

float top_sum3(float *ss[], struct PoTPlan *P) {
  float sum1, sum2, tmaxB, tmax;
  int   i;
  float *one = ss[0];
  float *two = ss[0]+P->tmp0;
  float *three = ss[0]+P->tmp1;
  tmaxB = tmax = float(0.0);

  for (i = 0 ; i < P->di-21; i += 22) {
    sum1 = one[i+0] + two[i+0];         sum2 = one[i+1] + two[i+1];
    sum1 += three[i+0];                 sum2 += three[i+1];
    P->dest[i+0] = sum1;                P->dest[i+1] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+2] + two[i+2];         sum2 = one[i+3] + two[i+3];
    sum1 += three[i+2];                 sum2 += three[i+3];
    P->dest[i+2] = sum1;                P->dest[i+3] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+4] + two[i+4];         sum2 = one[i+5] + two[i+5];
    sum1 += three[i+4];                 sum2 += three[i+5];
    P->dest[i+4] = sum1;                P->dest[i+5] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+6] + two[i+6];         sum2 = one[i+7] + two[i+7];
    sum1 += three[i+6];                 sum2 += three[i+7];
    P->dest[i+6] = sum1;                P->dest[i+7] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+8] + two[i+8];         sum2 = one[i+9] + two[i+9];
    sum1 += three[i+8];                 sum2 += three[i+9];
    P->dest[i+8] = sum1;                P->dest[i+9] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+10] + two[i+10];       sum2 = one[i+11] + two[i+11];
    sum1 += three[i+10];                sum2 += three[i+11];
    P->dest[i+10] = sum1;               P->dest[i+11] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+12] + two[i+12];       sum2 = one[i+13] + two[i+13];
    sum1 += three[i+12];                sum2 += three[i+13];
    P->dest[i+12] = sum1;               P->dest[i+13] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+14] + two[i+14];       sum2 = one[i+15] + two[i+15];
    sum1 += three[i+14];                sum2 += three[i+15];
    P->dest[i+14] = sum1;               P->dest[i+15] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+16] + two[i+16];       sum2 = one[i+17] + two[i+17];
    sum1 += three[i+16];                sum2 += three[i+17];
    P->dest[i+16] = sum1;               P->dest[i+17] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+18] + two[i+18];       sum2 = one[i+19] + two[i+19];
    sum1 += three[i+18];                sum2 += three[i+19];
    P->dest[i+18] = sum1;               P->dest[i+19] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+20] + two[i+20];       sum2 = one[i+21] + two[i+21];
    sum1 += three[i+20];                sum2 += three[i+21];
    P->dest[i+20] = sum1;               P->dest[i+21] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;
  }
  for (   ; i < P->di; i++) {
    sum1 = one[i] + two[i] + three[i];
    P->dest[i] = sum1;
    if (sum1 > tmax) tmax = sum1;
  }
  if (tmax > tmaxB) return tmax;
  return tmaxB;
}

sum_func swi3tb[FOLDTBLEN] = {
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, top_sum3
};
//

//

float sw_sum4_t31(float *ss[], struct PoTPlan *P) {
  float sum1, sum2, tmax2, tmax1;
  int i = P->di;
  float *one   = ss[0];
  float *two   = ss[0]+P->tmp0;
  float *three = ss[0]+P->tmp1;
  float *four  = ss[0]+P->tmp2;
  tmax2 = tmax1 = float(0.0);

  if ( i & 1 ) {
    i -= 1;
    tmax1 = one[i] + two[i];            sum2 = three[i] + four[i];
    tmax1 += sum2;
    P->dest[i] = tmax1;
  }
  switch (i) {
    case 30:
      sum1 = one[29] + two[29];           sum2 = one[28] + two[28];
      sum1 += three[29];                  sum2 += three[28];
      sum1 += four[29];                   sum2 += four[28];
      P->dest[29] = sum1;                 P->dest[28] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 28:
      sum1 = one[27] + two[27];           sum2 = one[26] + two[26];
      sum1 += three[27];                  sum2 += three[26];
      sum1 += four[27];                   sum2 += four[26];
      P->dest[27] = sum1;                 P->dest[26] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 26:
      sum1 = one[25] + two[25];           sum2 = one[24] + two[24];
      sum1 += three[25];                  sum2 += three[24];
      sum1 += four[25];                   sum2 += four[24];
      P->dest[25] = sum1;                 P->dest[24] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 24:
      sum1 = one[23] + two[23];           sum2 = one[22] + two[22];
      sum1 += three[23];                  sum2 += three[22];
      sum1 += four[23];                   sum2 += four[22];
      P->dest[23] = sum1;                 P->dest[22] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 22:
      sum1 = one[21] + two[21];           sum2 = one[20] + two[20];
      sum1 += three[21];                  sum2 += three[20];
      sum1 += four[21];                   sum2 += four[20];
      P->dest[21] = sum1;                 P->dest[20] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 20:
      sum1 = one[19] + two[19];           sum2 = one[18] + two[18];
      sum1 += three[19];                  sum2 += three[18];
      sum1 += four[19];                   sum2 += four[18];
      P->dest[19] = sum1;                 P->dest[18] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 18:
      sum1 = one[17] + two[17];           sum2 = one[16] + two[16];
      sum1 += three[17];                  sum2 += three[16];
      sum1 += four[17];                   sum2 += four[16];
      P->dest[17] = sum1;                 P->dest[16] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 16:
      sum1 = one[15] + two[15];           sum2 = one[14] + two[14];
      sum1 += three[15];                  sum2 += three[14];
      sum1 += four[15];                   sum2 += four[14];
      P->dest[15] = sum1;                 P->dest[14] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 14:
      sum1 = one[13] + two[13];           sum2 = one[12] + two[12];
      sum1 += three[13];                  sum2 += three[12];
      sum1 += four[13];                   sum2 += four[12];
      P->dest[13] = sum1;                 P->dest[12] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 12:
      sum1 = one[11] + two[11];           sum2 = one[10] + two[10];
      sum1 += three[11];                  sum2 += three[10];
      sum1 += four[11];                   sum2 += four[10];
      P->dest[11] = sum1;                 P->dest[10] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 10:
      sum1 = one[9] + two[9];             sum2 = one[8] + two[8];
      sum1 += three[9];                   sum2 += three[8];
      sum1 += four[9];                    sum2 += four[8];
      P->dest[9] = sum1;                  P->dest[8] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 8:
      sum1 = one[7] + two[7];             sum2 = one[6] + two[6];
      sum1 += three[7];                   sum2 += three[6];
      sum1 += four[7];                    sum2 += four[6];
      P->dest[7] = sum1;                  P->dest[6] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 6:
      sum1 = one[5] + two[5];             sum2 = one[4] + two[4];
      sum1 += three[5];                   sum2 += three[4];
      sum1 += four[5];                    sum2 += four[4];
      P->dest[5] = sum1;                  P->dest[4] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 4:
      sum1 = one[3] + two[3];             sum2 = one[2] + two[2];
      sum1 += three[3];                   sum2 += three[2];
      sum1 += four[3];                    sum2 += four[2];
      P->dest[3] = sum1;                  P->dest[2] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 2:
      sum1 = one[1] + two[1];             sum2 = one[0] + two[0];
      sum1 += three[1];                   sum2 += three[0];
      sum1 += four[1];                    sum2 += four[0];
      P->dest[1] = sum1;                  P->dest[0] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
  }
  if (tmax1 > tmax2) return tmax1;
  return tmax2;
}
//

// use for higher sum4 folds

//

float top_sum4(float *ss[], struct PoTPlan *P) {
  float        sum1, sum2, tmaxB, tmax;
  int i;
  float *one = ss[0];
  float *two = ss[0]+P->tmp0;
  float *three = ss[0]+P->tmp1;
  float *four = ss[0]+P->tmp2;
  tmaxB = tmax = float(0.0);

  for (i = 0 ; i < P->di-15; i += 16) {
    sum1 = one[i+0] + two[i+0];         sum2 = one[i+1] + two[i+1];
    sum1 += three[i+0];                 sum2 += three[i+1];
    sum1 += four[i+0];                  sum2 += four[i+1];
    P->dest[i+0] = sum1;                P->dest[i+1] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+2] + two[i+2];         sum2 = one[i+3] + two[i+3];
    sum1 += three[i+2];                 sum2 += three[i+3];
    sum1 += four[i+2];                  sum2 += four[i+3];
    P->dest[i+2] = sum1;                P->dest[i+3] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+4] + two[i+4];         sum2 = one[i+5] + two[i+5];
    sum1 += three[i+4];                 sum2 += three[i+5];
    sum1 += four[i+4];                  sum2 += four[i+5];
    P->dest[i+4] = sum1;                P->dest[i+5] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+6] + two[i+6];         sum2 = one[i+7] + two[i+7];
    sum1 += three[i+6];                 sum2 += three[i+7];
    sum1 += four[i+6];                  sum2 += four[i+7];
    P->dest[i+6] = sum1;                P->dest[i+7] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+8] + two[i+8];         sum2 = one[i+9] + two[i+9];
    sum1 += three[i+8];                 sum2 += three[i+9];
    sum1 += four[i+8];                  sum2 += four[i+9];
    P->dest[i+8] = sum1;                P->dest[i+9] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+10] + two[i+10];       sum2 = one[i+11] + two[i+11];
    sum1 += three[i+10];                sum2 += three[i+11];
    sum1 += four[i+10];                 sum2 += four[i+11];
    P->dest[i+10] = sum1;               P->dest[i+11] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+12] + two[i+12];       sum2 = one[i+13] + two[i+13];
    sum1 += three[i+12];                sum2 += three[i+13];
    sum1 += four[i+12];                 sum2 += four[i+13];
    P->dest[i+12] = sum1;               P->dest[i+13] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+14] + two[i+14];       sum2 = one[i+15] + two[i+15];
    sum1 += three[i+14];                sum2 += three[i+15];
    sum1 += four[i+14];                 sum2 += four[i+15];
    P->dest[i+14] = sum1;               P->dest[i+15] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;
  }
  for (   ; i < P->di; i++) {
    sum1 = (one[i] + two[i] );          sum2 = (three[i] + four[i] );
    sum1 += sum2;
    P->dest[i] = sum1;
    if (sum1 > tmax) tmax = sum1;
  }
  if (tmax > tmaxB) return tmax;
  return tmaxB;
}
//

sum_func swi4tb[FOLDTBLEN] = {
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, top_sum4
};
//

//

float sw_sum5_t31(float *ss[], struct PoTPlan *P) {
  float sum1, sum2, tmax2, tmax1;
  int i = P->di;
  float *one   = ss[0];
  float *two   = ss[0]+P->tmp0;
  float *three = ss[0]+P->tmp1;
  float *four  = ss[0]+P->tmp2;
  float *five  = ss[0]+P->tmp3;
  tmax2 = tmax1 = float(0.0);

  if ( i & 1 ) {
    i -= 1;
    tmax1 = one[i] + two[i];            sum2 = three[i] + four[i];
    tmax1 += five[i];
    tmax1 += sum2;
    P->dest[i] = tmax1;
  }
  switch (i) {
    case 30:
      sum1 = one[29] + two[29];           sum2 = one[28] + two[28];
      sum1 += three[29];                  sum2 += three[28];
      sum1 += four[29];                   sum2 += four[28];
      sum1 += five[29];                   sum2 += five[28];
      P->dest[29] = sum1;                 P->dest[28] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 28:
      sum1 = one[27] + two[27];           sum2 = one[26] + two[26];
      sum1 += three[27];                  sum2 += three[26];
      sum1 += four[27];                   sum2 += four[26];
      sum1 += five[27];                   sum2 += five[26];
      P->dest[27] = sum1;                 P->dest[26] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 26:
      sum1 = one[25] + two[25];           sum2 = one[24] + two[24];
      sum1 += three[25];                  sum2 += three[24];
      sum1 += four[25];                   sum2 += four[24];
      sum1 += five[25];                   sum2 += five[24];
      P->dest[25] = sum1;                 P->dest[24] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 24:
      sum1 = one[23] + two[23];           sum2 = one[22] + two[22];
      sum1 += three[23];                  sum2 += three[22];
      sum1 += four[23];                   sum2 += four[22];
      sum1 += five[23];                   sum2 += five[22];
      P->dest[23] = sum1;                 P->dest[22] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 22:
      sum1 = one[21] + two[21];           sum2 = one[20] + two[20];
      sum1 += three[21];                  sum2 += three[20];
      sum1 += four[21];                   sum2 += four[20];
      sum1 += five[21];                   sum2 += five[20];
      P->dest[21] = sum1;                 P->dest[20] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 20:
      sum1 = one[19] + two[19];           sum2 = one[18] + two[18];
      sum1 += three[19];                  sum2 += three[18];
      sum1 += four[19];                   sum2 += four[18];
      sum1 += five[19];                   sum2 += five[18];
      P->dest[19] = sum1;                 P->dest[18] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 18:
      sum1 = one[17] + two[17];           sum2 = one[16] + two[16];
      sum1 += three[17];                  sum2 += three[16];
      sum1 += four[17];                   sum2 += four[16];
      sum1 += five[17];                   sum2 += five[16];
      P->dest[17] = sum1;                 P->dest[16] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 16:
      sum1 = one[15] + two[15];           sum2 = one[14] + two[14];
      sum1 += three[15];                  sum2 += three[14];
      sum1 += four[15];                   sum2 += four[14];
      sum1 += five[15];                   sum2 += five[14];
      P->dest[15] = sum1;                 P->dest[14] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 14:
      sum1 = one[13] + two[13];           sum2 = one[12] + two[12];
      sum1 += three[13];                  sum2 += three[12];
      sum1 += four[13];                   sum2 += four[12];
      sum1 += five[13];                   sum2 += five[12];
      P->dest[13] = sum1;                 P->dest[12] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 12:
      sum1 = one[11] + two[11];           sum2 = one[10] + two[10];
      sum1 += three[11];                  sum2 += three[10];
      sum1 += four[11];                   sum2 += four[10];
      sum1 += five[11];                   sum2 += five[10];
      P->dest[11] = sum1;                 P->dest[10] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 10:
      sum1 = one[9] + two[9];             sum2 = one[8] + two[8];
      sum1 += three[9];                   sum2 += three[8];
      sum1 += four[9];                    sum2 += four[8];
      sum1 += five[9];                    sum2 += five[8];
      P->dest[9] = sum1;                  P->dest[8] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 8:
      sum1 = one[7] + two[7];             sum2 = one[6] + two[6];
      sum1 += three[7];                   sum2 += three[6];
      sum1 += four[7];                    sum2 += four[6];
      sum1 += five[7];                    sum2 += five[6];
      P->dest[7] = sum1;                  P->dest[6] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 6:
      sum1 = one[5] + two[5];             sum2 = one[4] + two[4];
      sum1 += three[5];                   sum2 += three[4];
      sum1 += four[5];                    sum2 += four[4];
      sum1 += five[5];                    sum2 += five[4];
      P->dest[5] = sum1;                  P->dest[4] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 4:
      sum1 = one[3] + two[3];             sum2 = one[2] + two[2];
      sum1 += three[3];                   sum2 += three[2];
      sum1 += four[3];                    sum2 += four[2];
      sum1 += five[3];                    sum2 += five[2];
      P->dest[3] = sum1;                  P->dest[2] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 2:
      sum1 = one[1] + two[1];             sum2 = one[0] + two[0];
      sum1 += three[1];                   sum2 += three[0];
      sum1 += four[1];                    sum2 += four[0];
      sum1 += five[1];                    sum2 += five[0];
      P->dest[1] = sum1;                  P->dest[0] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
  }
  if (tmax1 > tmax2) return tmax1;
  return tmax2;
}
//

// use for higher sum5 folds

//

float top_sum5(float *ss[], struct PoTPlan *P) {
  float        sum1, sum2, tmaxB, tmax;
  int i;
  float *one = ss[0];
  float *two = ss[0]+P->tmp0;
  float *three = ss[0]+P->tmp1;
  float *four = ss[0]+P->tmp2;
  float *five = ss[0]+P->tmp3;
  tmaxB = tmax = float(0.0);

  for (i = 0 ; i < P->di-15; i += 16) {
    sum1 = one[i+0] + two[i+0];         sum2 = one[i+1] + two[i+1];
    sum1 += three[i+0];                 sum2 += three[i+1];
    sum1 += four[i+0];                  sum2 += four[i+1];
    sum1 += five[i+0];                  sum2 += five[i+1];
    P->dest[i+0] = sum1;                P->dest[i+1] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+2] + two[i+2];         sum2 = one[i+3] + two[i+3];
    sum1 += three[i+2];                 sum2 += three[i+3];
    sum1 += four[i+2];                  sum2 += four[i+3];
    sum1 += five[i+2];                  sum2 += five[i+3];
    P->dest[i+2] = sum1;                P->dest[i+3] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+4] + two[i+4];         sum2 = one[i+5] + two[i+5];
    sum1 += three[i+4];                 sum2 += three[i+5];
    sum1 += four[i+4];                  sum2 += four[i+5];
    sum1 += five[i+4];                  sum2 += five[i+5];
    P->dest[i+4] = sum1;                P->dest[i+5] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+6] + two[i+6];         sum2 = one[i+7] + two[i+7];
    sum1 += three[i+6];                 sum2 += three[i+7];
    sum1 += four[i+6];                  sum2 += four[i+7];
    sum1 += five[i+6];                  sum2 += five[i+7];
    P->dest[i+6] = sum1;                P->dest[i+7] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+8] + two[i+8];         sum2 = one[i+9] + two[i+9];
    sum1 += three[i+8];                 sum2 += three[i+9];
    sum1 += four[i+8];                  sum2 += four[i+9];
    sum1 += five[i+8];                  sum2 += five[i+9];
    P->dest[i+8] = sum1;                P->dest[i+9] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+10] + two[i+10];       sum2 = one[i+11] + two[i+11];
    sum1 += three[i+10];                sum2 += three[i+11];
    sum1 += four[i+10];                 sum2 += four[i+11];
    sum1 += five[i+10];                 sum2 += five[i+11];
    P->dest[i+10] = sum1;               P->dest[i+11] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+12] + two[i+12];       sum2 = one[i+13] + two[i+13];
    sum1 += three[i+12];                sum2 += three[i+13];
    sum1 += four[i+12];                 sum2 += four[i+13];
    sum1 += five[i+12];                 sum2 += five[i+13];
    P->dest[i+12] = sum1;               P->dest[i+13] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+14] + two[i+14];       sum2 = one[i+15] + two[i+15];
    sum1 += three[i+14];                sum2 += three[i+15];
    sum1 += four[i+14];                 sum2 += four[i+15];
    sum1 += five[i+14];                 sum2 += five[i+15];
    P->dest[i+14] = sum1;               P->dest[i+15] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;
  }
  for (   ; i < P->di; i++) {
    sum1 = (one[i] + two[i] );          sum2 = (three[i] + four[i] );
    sum1 += five[i];
    sum1 += sum2;
    P->dest[i] = sum1;
    if (sum1 > tmax) tmax = sum1;
  }
  if (tmax > tmaxB) return tmax;
  return tmaxB;
}
//

//

sum_func swi5tb[FOLDTBLEN] = {
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,
  sw_sum5_t31,  sw_sum5_t31,  sw_sum5_t31,  top_sum5
};
//

//

float sw_sum2_t31(float *ss[], struct PoTPlan *P) {
  float sum1, sum2, tmax2, tmax1;
  int i = P->di;
  float *one   = ss[1]+P->offset;
  float *two   = ss[1]+P->tmp0;
  tmax2 = tmax1 = float(0.0);

  if ( i & 1 ) {
    i -= 1;
    tmax1 = one[i] + two[i];
    P->dest[i] = tmax1;
  }
  switch (i) {
    case 30:
      sum1 = one[29] + two[29];           sum2 = one[28] + two[28];
      P->dest[29] = sum1;                 P->dest[28] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 28:
      sum1 = one[27] + two[27];           sum2 = one[26] + two[26];
      P->dest[27] = sum1;                 P->dest[26] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 26:
      sum1 = one[25] + two[25];           sum2 = one[24] + two[24];
      P->dest[25] = sum1;                 P->dest[24] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 24:
      sum1 = one[23] + two[23];           sum2 = one[22] + two[22];
      P->dest[23] = sum1;                 P->dest[22] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 22:
      sum1 = one[21] + two[21];           sum2 = one[20] + two[20];
      P->dest[21] = sum1;                 P->dest[20] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 20:
      sum1 = one[19] + two[19];           sum2 = one[18] + two[18];
      P->dest[19] = sum1;                 P->dest[18] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 18:
      sum1 = one[17] + two[17];           sum2 = one[16] + two[16];
      P->dest[17] = sum1;                 P->dest[16] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 16:
      sum1 = one[15] + two[15];           sum2 = one[14] + two[14];
      P->dest[15] = sum1;                 P->dest[14] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 14:
      sum1 = one[13] + two[13];           sum2 = one[12] + two[12];
      P->dest[13] = sum1;                 P->dest[12] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 12:
      sum1 = one[11] + two[11];           sum2 = one[10] + two[10];
      P->dest[11] = sum1;                 P->dest[10] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 10:
      sum1 = one[9] + two[9];             sum2 = one[8] + two[8];
      P->dest[9] = sum1;                  P->dest[8] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 8:
      sum1 = one[7] + two[7];             sum2 = one[6] + two[6];
      P->dest[7] = sum1;                  P->dest[6] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 6:
      sum1 = one[5] + two[5];             sum2 = one[4] + two[4];
      P->dest[5] = sum1;                  P->dest[4] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 4:
      sum1 = one[3] + two[3];             sum2 = one[2] + two[2];
      P->dest[3] = sum1;                  P->dest[2] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
    case 2:
      sum1 = one[1] + two[1];             sum2 = one[0] + two[0];
      P->dest[1] = sum1;                  P->dest[0] = sum2;
      if (sum1 > tmax1) tmax1 = sum1;     if (sum2 > tmax2) tmax2 = sum2;
  }
  if (tmax1 > tmax2) return tmax1;
  return tmax2;
}
//

// use for higher sum2 folds

//

float top_sum2(float *ss[], struct PoTPlan *P) {
  float sum1, sum2, tmaxB, tmax;
  int   i;
  float *one = ss[1]+P->offset;
  float *two = ss[1]+P->tmp0;
  tmaxB = tmax = float(0.0);

  for (i = 0 ; i < P->di-21; i += 22) {
    sum1 = one[i+0] + two[i+0];         sum2 = one[i+1] + two[i+1];
    P->dest[i+0] = sum1;                P->dest[i+1] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+2] + two[i+2];         sum2 = one[i+3] + two[i+3];
    P->dest[i+2] = sum1;                P->dest[i+3] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+4] + two[i+4];         sum2 = one[i+5] + two[i+5];
    P->dest[i+4] = sum1;                P->dest[i+5] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+6] + two[i+6];         sum2 = one[i+7] + two[i+7];
    P->dest[i+6] = sum1;                P->dest[i+7] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+8] + two[i+8];         sum2 = one[i+9] + two[i+9];
    P->dest[i+8] = sum1;                P->dest[i+9] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+10] + two[i+10];       sum2 = one[i+11] + two[i+11];
    P->dest[i+10] = sum1;               P->dest[i+11] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+12] + two[i+12];       sum2 = one[i+13] + two[i+13];
    P->dest[i+12] = sum1;               P->dest[i+13] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+14] + two[i+14];       sum2 = one[i+15] + two[i+15];
    P->dest[i+14] = sum1;               P->dest[i+15] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+16] + two[i+16];       sum2 = one[i+17] + two[i+17];
    P->dest[i+16] = sum1;               P->dest[i+17] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+18] + two[i+18];       sum2 = one[i+19] + two[i+19];
    P->dest[i+18] = sum1;               P->dest[i+19] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;

    sum1 = one[i+20] + two[i+20];       sum2 = one[i+21] + two[i+21];
    P->dest[i+20] = sum1;               P->dest[i+21] = sum2;
    if (sum1 > tmax) tmax = sum1;       if (sum2 > tmaxB) tmaxB = sum2;
  }
  for (   ; i < P->di; i++) {
    sum1 = one[i] + two[i];
    P->dest[i] = sum1;
    if (sum1 > tmax) tmax = sum1;
  }
  if (tmax > tmaxB) return tmax;
  return tmaxB;
}

sum_func swi2tb[FOLDTBLEN] = {
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, top_sum2
};
//

//

FoldSet swifold = {swi3tb, swi4tb, swi5tb, swi2tb, swi2tb, "FPU opt"};

// Arrays from which folding subroutines will be chosen during execution.

// Other sets can be inserted to replace the defaults.

//


sum_func sumsel3[FOLDTBLEN] = {
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, sw_sum3_t31,
  sw_sum3_t31, sw_sum3_t31, sw_sum3_t31, top_sum3
};
sum_func sumsel4[FOLDTBLEN] = {
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, sw_sum4_t31,
  sw_sum4_t31, sw_sum4_t31, sw_sum4_t31, top_sum4
};
sum_func sumsel5[FOLDTBLEN] = {
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, sw_sum5_t31,
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, sw_sum5_t31,
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, sw_sum5_t31,
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, sw_sum5_t31,
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, sw_sum5_t31,
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, sw_sum5_t31,
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, sw_sum5_t31,
  sw_sum5_t31, sw_sum5_t31, sw_sum5_t31, top_sum5
};
sum_func sumsel2[FOLDTBLEN] = {
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, top_sum2
};
sum_func sumsel2AL[FOLDTBLEN] = {
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, sw_sum2_t31,
  sw_sum2_t31, sw_sum2_t31, sw_sum2_t31, top_sum2
};

FoldSet Foldmain = {sumsel3, sumsel4, sumsel5, sumsel2, sumsel2AL, "opt FPU"};

/**********************
 *
 * Routine to copy fold subroutine tables.
 *
 */
int CopyFoldSet(FoldSet *dst, FoldSet *src) {
  int  i, j3, j4, j5, j2, j2AL;

  j3 = j4 = j5 = j2 = j2AL = 0;

  for (i = 0; i < FOLDTBLEN; i++) {

    if (src->f3[i] != 0)  {
      dst->f3[i] = src->f3[i];
      j3 = i;
    } else dst->f3[i] = src->f3[j3];

    if (src->f4[i] != 0)  {
      dst->f4[i] = src->f4[i];
      j4 = i;
    } else dst->f4[i] = src->f4[j4];

    if (src->f5[i] != 0)  {
      dst->f5[i] = src->f5[i];
      j5 = i;
    } else dst->f5[i] = src->f5[j5];

    if (src->f2[i] != 0)  {
      dst->f2[i] = src->f2[i];
      j2 = i;
    } else dst->f2[i] = src->f2[j2];

    if (src->f2AL[i] != 0)  {
      dst->f2AL[i] = src->f2AL[i];
      j2AL = i;
    } else dst->f2AL[i] = src->f2AL[j2AL];
  }
  dst->name = src->name;
  return 0;
}

/**********************
 *
 * t_funct - Caching routine for calls to invert_lcgf, return cache value if present
 *
 * This version caches and returns the threshold factor for dis_thresh rather than cur_thresh.
 *
 * The threshold factor assumes folding subroutines which do NOT divide the sums by num_adds.
 */
float t_funct(int m, int n, int x) {
 static struct tftab {
   int n;
   float y;
 }
 *t_funct_tab;

 float c_dis_thresh = (float)swi.analysis_cfg.pulse_display_thresh;

 if (!t_funct_tab) {
   int tablen = (PoTInfo.PulseMax+2)/3 +
                3*(int)(log((float)PoTInfo.PulseMax)/log(2.0)-3) - 1;
   t_funct_tab=(struct tftab *)malloc_a(tablen*sizeof(struct tftab),MEM_ALIGN);
   if (!t_funct_tab) SETIERROR(MALLOC_FAILED, "!t_funct_tab");
   memset(t_funct_tab, 0, tablen*sizeof(struct tftab));
 }
 if (t_funct_tab[x].n!=n) {
   t_funct_tab[x].n=n;
    t_funct_tab[x].y = (invert_lcgf((float)(-PoTInfo.PulseThresh - log((float)m)),
                                     (float)n, (float)1e-4) - n) * c_dis_thresh + n; 
 }
 return t_funct_tab[x].y;
}

/**********************
 *
 * Preplanning routine called from find_pulse()
 *
 */
int plan_PulsePoT(PoTPlan * PSeq, int PulsePotLen, float *div, int *dbinoffs) {
  float period;
  int ndivs;
  int i, j, di, dbins, offset;
  int num_adds_2;
  long p;
  unsigned long cperiod;
  int num_adds;
  register float tmp_max,t1;
  int res=1;
  int k = 0; // plan index


  for (i = 32, ndivs = 1; i <= PulsePotLen; ndivs++, i *= 2);

  int32_t thePotLen = PulsePotLen;

  //  Periods from PulsePotLen/3 to PulsePotLen/4, and power of 2 fractions of.

  //   then (len/4 to len/5) and finally (len/5 to len/6)

  //

  int32_t firstP, lastP;
  for(num_adds = 3; num_adds <= 5; num_adds++) {
    int num_adds_minus1;
    switch(num_adds) {
      case 3: lastP = (thePotLen*2)/3;  firstP = (thePotLen*1)/2; num_adds_minus1 = 2; break;
      case 4: lastP = (thePotLen*3)/4;  firstP = (thePotLen*3)/5; num_adds_minus1 = 3; break;
      case 5: lastP = (thePotLen*4)/5;  firstP = (thePotLen*4)/6; num_adds_minus1 = 4; break;
    }

    for (p = lastP ; p > firstP ; p--) {
      int tabofst = ndivs*3+2-num_adds;
      PSeq[k].dest = div+dbinoffs[0]; // Output storage

      PSeq[k].cperiod = cperiod = p*(C3X2TO14 / num_adds_minus1);
      PSeq[k].tmp0 = (int)((cperiod+C3X2TO13)/C3X2TO14);
      PSeq[k].tmp1 = (int)((cperiod*2+C3X2TO13)/C3X2TO14);
      PSeq[k].di = di  = (int)cperiod/C3X2TO14;
      PSeq[k].na = num_adds;
      PSeq[k].thresh = t_funct(di, num_adds, di+tabofst);

      switch(num_adds) {
        case 3:
          PSeq[k].fun_ptr = (di < FOLDTBLEN) ? sumsel3[di] : sumsel3[FOLDTBLEN-1];
          break;
        case 4:
          PSeq[k].tmp2 = (int)((cperiod*3+C3X2TO13)/C3X2TO14);
          PSeq[k].fun_ptr = (di < FOLDTBLEN) ? sumsel4[di] : sumsel4[FOLDTBLEN-1];
          break;
        case 5:
          PSeq[k].tmp2 = (int)((cperiod*3+C3X2TO13)/C3X2TO14);
          PSeq[k].tmp3 = (int)((cperiod*4+C3X2TO13)/C3X2TO14);
          PSeq[k].fun_ptr = (di < FOLDTBLEN) ? sumsel5[di] : sumsel5[FOLDTBLEN-1];
          break;
      }

      k++;                    // next plan

      num_adds_2 = 2* num_adds;

      for (j = 1; j < ndivs ; j++) {
        tabofst -= 3;
        PSeq[k].offset = dbinoffs[j-1];
        PSeq[k].dest = div+dbinoffs[j];
        PSeq[k].cperiod = PSeq[k-1].cperiod/2;
        PSeq[k].tmp0 = di & 1;
        PSeq[k].di = di = PSeq[k].cperiod/C3X2TO14;
        PSeq[k].tmp0 += di + PSeq[k].offset; //PSeq[k].offset + (PSeq[k].cperiod+C3X2TO13)/C3X2TO14

        if (PSeq[k].tmp0 & 3) PSeq[k].fun_ptr  = (di < FOLDTBLEN) ? sumsel2[di] : sumsel2[FOLDTBLEN-1];
        else PSeq[k].fun_ptr  = (di < FOLDTBLEN) ? sumsel2AL[di] : sumsel2AL[FOLDTBLEN-1];
        PSeq[k].na = num_adds_2;
        PSeq[k].thresh = t_funct(di, num_adds_2, di+tabofst);

        k++;                    // next plan

        num_adds_2 *=2;
      }  // for (j = 1; j < ndivs

    } // for (p = lastP

  } // for(num_adds =

  PSeq[k].di = 0; // stop

  return (k);
}

/**********************
 *
 * find_pulse - uses a folding algorithm to find repeating pulses
 *   Initially folds by prime number then by powers of two.
 *
 * Initial code by Eric Korpela
 * Major revisions and optimization by Ben Herndon
 * Further revised by Joe Segur
 *
 */
int find_pulse(const float * fp_PulsePot, int PulsePotLen,
               float pulse_thresh, int TOffset, int FOffset) {
  static float *div,*FoldedPOT;
  static int *dbinoffs, PrevPPL, PrevPoTln;
  static PoTPlan *PSeq;
  static float rcfg_dis_thresh = 1.0f / (float)swi.analysis_cfg.pulse_display_thresh;
  PoTPlan PTPln = {0} ;
  float *SrcSel[2];
  float period;
  int ndivs;
  int i, j, di, maxs = 0;
  int num_adds, num_adds_2;
  long p;
  float max=0,maxd=0,avg=0,maxp=0, snr=0, fthresh=0;
  register float tmp_max,t1;
  int res=1;

  // debug possible heap corruption -- jeffc

#ifdef _WIN32

  BOINCASSERT(_CrtCheckMemory());
#endif


#ifdef DEBUG_PULSE_VERBOSE

  fprintf(stderr, "In find_pulse()...   PulsePotLen = %d   pulse_thresh = %f   TOffset = %d   PoT = %d\n",  PulsePotLen,  pulse_thresh, TOffset, FOffset);
#endif


  // boinc_worker_timer();

  
  //  Create internal arrays....

  if (!div) {
    int n, maxdivs = 1;
    for (i = 32; i <= PoTInfo.PulseMax; maxdivs++, i *= 2);
    div=(float *)malloc_a(((PoTInfo.PulseMax*7/4)*sizeof(float))+(maxdivs*MEM_ALIGN), MEM_ALIGN);
    FoldedPOT=(float *)malloc_a((PoTInfo.PulseMax+1)*sizeof(float)/3, MEM_ALIGN);
    dbinoffs=(int *)malloc_a(maxdivs*sizeof(int), MEM_ALIGN);
    for (i = 32, n = 1; i <= PPLANMAX; n++, i *= 2);
    i =  ((PPLANMAX*2)/3)-((PPLANMAX*2)/4);
    i += ((PPLANMAX*3)/4)-((PPLANMAX*3)/5);
    i += ((PPLANMAX*4)/5)-((PPLANMAX*4)/6);
    i *= n;
    PSeq = (PoTPlan *)malloc_a((i+1)*sizeof(PoTPlan), MEM_ALIGN);
    if ((!div) || (!FoldedPOT) || (!dbinoffs) || (!PSeq))
      SETIERROR(MALLOC_FAILED, "(!div) || (!FoldedPOT) || (!dbinoffs) || (!PSeq)");
    if (t_funct(6,1000,0)<0) SETIERROR(MALLOC_FAILED, "t_funct(6,1000,0)<0");;
  }

  SrcSel[0] = (float *)fp_PulsePot;  // source of data for 3, 4, 5 folds

  SrcSel[1] = div;                   // source of data for 2 folds


/* Uncomment this block if res > 1 is ever actually used

  //  Rebin to lower resolution if required
  if (res <= 1) {
    memcpy(div,fp_PulsePot,PulsePotLen*sizeof(float));
  } else {
    for (j=0;j<PulsePotLen/res;j++) {
      div[j]=0;
      for (i=0;i<res;i++) {
        div[j]+=fp_PulsePot[j*res+i];
      }
      div[j]/=res;
    }
    PulsePotLen/=res;
    SrcSel[0] = div;
  }
*/

  //  Calculate average power

  for (i = 0; i < PulsePotLen; i++) {
    avg += *(SrcSel[0] + i);
  }
  avg/=PulsePotLen;

  for (i = 32, ndivs = 1; i <= PulsePotLen; ndivs++, i *= 2);
  // ndivs=(int)(log((float)PulsePotLen)/log(2.0)-3);


  int32_t thePotLen = PulsePotLen;

  if (PrevPPL != PulsePotLen ) {
    PrevPPL = PulsePotLen;
    dbinoffs[0] = (PulsePotLen + (MEM_ALIGN/sizeof(float))-1) & -(MEM_ALIGN/sizeof(float));
    period = (float)((int)((PulsePotLen*2)/3))/2;
    for (i = 1; i < ndivs; i++, period/=2) {
      dbinoffs[i] = (((int)(period+0.5f) + (MEM_ALIGN/sizeof(float))-1) & -(MEM_ALIGN/sizeof(float))) + dbinoffs[i-1];
    }
  }

  if (PulsePotLen <= PPLANMAX) {    // If short, work from plan.

    int k;
    float cur_thresh, dis_thresh, t1;

    if (PulsePotLen != PrevPoTln) {  // if new length, generate plan.

      plan_PulsePoT(PSeq, PulsePotLen, div, dbinoffs);
      PrevPoTln = PulsePotLen;
    }

    for ( k = 0; PSeq[k].di; k++) {
      dis_thresh = PSeq[k].thresh*avg;

      tmp_max = PSeq[k].fun_ptr(SrcSel, &PSeq[k]);

      if (tmp_max>dis_thresh) {
        // unscale for reporting

        tmp_max /= PSeq[k].na;
        cur_thresh = (dis_thresh / PSeq[k].na - avg) * rcfg_dis_thresh + avg;

        ReportPulseEvent(tmp_max/avg,avg,res*(float)PSeq[k].cperiod/(float)C3X2TO14,
                         TOffset+PulsePotLen/2,FOffset,
                         (tmp_max-avg)*(float)sqrt((float)PSeq[k].na)/avg,
                         (cur_thresh-avg)*(float)sqrt((float)PSeq[k].na)/avg,
                         PSeq[k].dest, PSeq[k].na, 0);

        if ((tmp_max>cur_thresh) && ((t1=tmp_max-cur_thresh)>maxd)) {
          maxp  = (float)PSeq[k].cperiod/(float)C3X2TO14;
          maxd  = t1;
          maxs  = PSeq[k].na;
          max = tmp_max;
          snr = (float)((tmp_max-avg)*sqrt((float)PSeq[k].na)/avg);
          fthresh=(float)((cur_thresh-avg)*sqrt((float)PSeq[k].na)/avg);
          memcpy(FoldedPOT, PSeq[k].dest, PSeq[k].di*sizeof(float));
        }
      }
    }  // for ( k =

  }
  else {                         // If not plan,


    //  Periods from PulsePotLen/3 to PulsePotLen/4, and power of 2 fractions of.

    //   then (len/4 to len/5) and finally (len/5 to len/6)

    //

    int32_t firstP, lastP;
    for(num_adds = 3; num_adds <= 5; num_adds++) {
      int num_adds_minus1;
      switch(num_adds) {
        case 3: lastP = (thePotLen*2)/3;  firstP = (thePotLen*1)/2; num_adds_minus1 = 2; break;
        case 4: lastP = (thePotLen*3)/4;  firstP = (thePotLen*3)/5; num_adds_minus1 = 3; break;
        case 5: lastP = (thePotLen*4)/5;  firstP = (thePotLen*4)/6; num_adds_minus1 = 4; break;
      }
  
      for (p = lastP ; p > firstP ; p--) {
        float cur_thresh, dis_thresh;
        int tabofst, mper, perdiv;

        tabofst = ndivs*3+2-num_adds;
        mper = p * (12/num_adds_minus1);
        perdiv = num_adds_minus1;
        PTPln.tmp0 = (int)((mper + 6)/12);             // round(period)

        PTPln.tmp1 = (int)((mper * 2 + 6)/12);         // round(period*2)

        PTPln.di = (int)p/perdiv;                      // (int)period

        PTPln.dest = div+dbinoffs[0]; // Output storage

        dis_thresh = t_funct(PTPln.di, num_adds, PTPln.di+tabofst)*avg;

        switch(num_adds) {
          case 3:
            tmp_max = (PTPln.di < FOLDTBLEN) ? sumsel3[PTPln.di](SrcSel, &PTPln) : sumsel3[FOLDTBLEN-1](SrcSel, &PTPln);
            break;
          case 4:
            PTPln.tmp2 = (int)((mper * 3 + 6)/12);     // round(period*3)

            tmp_max = (PTPln.di < FOLDTBLEN) ? sumsel4[PTPln.di](SrcSel, &PTPln) : sumsel4[FOLDTBLEN-1](SrcSel, &PTPln);
            break;
          case 5:
            PTPln.tmp2 = (int)((mper * 3 + 6)/12);     // round(period*3)

            PTPln.tmp3 = (int)((mper * 4 + 6)/12);     // round(period*4)

            tmp_max = (PTPln.di < FOLDTBLEN) ? sumsel5[PTPln.di](SrcSel, &PTPln) : sumsel5[FOLDTBLEN-1](SrcSel, &PTPln);
            break;
        }
  
        if (tmp_max>dis_thresh) {
          // unscale for reporting

          tmp_max /= num_adds;
          cur_thresh = (dis_thresh / num_adds - avg) * rcfg_dis_thresh + avg;

          ReportPulseEvent(tmp_max/avg,avg,((float)p)/(float)perdiv*res,
                           TOffset+PulsePotLen/2,FOffset,
                           (tmp_max-avg)*(float)sqrt((float)num_adds)/avg,
                           (cur_thresh-avg)*(float)sqrt((float)num_adds)/avg,
                           PTPln.dest, num_adds, 0);

          if ((tmp_max>cur_thresh) && ((t1=tmp_max-cur_thresh)>maxd)) {
            maxp  = (float)p/(float)perdiv;
            maxd  = t1;
            maxs  = num_adds;
            max = tmp_max;
            snr = (float)((tmp_max-avg)*sqrt((float)num_adds)/avg);
            fthresh=(float)((cur_thresh-avg)*sqrt((float)num_adds)/avg);
            memcpy(FoldedPOT, PTPln.dest, PTPln.di*sizeof(float));
          }
        }

        num_adds_2 = 2* num_adds;

        for (j = 1; j < ndivs ; j++) {
          PTPln.offset = dbinoffs[j-1];
          PTPln.dest = div+dbinoffs[j];
          perdiv *=2;
          PTPln.tmp0 = PTPln.di & 1;
          PTPln.di /= 2;
          PTPln.tmp0 += PTPln.di + PTPln.offset;
          tabofst -=3;
          dis_thresh = t_funct(PTPln.di, num_adds_2, PTPln.di+tabofst) * avg;

          if (PTPln.tmp0 & 3)
            tmp_max = (PTPln.di < FOLDTBLEN) ? sumsel2[PTPln.di](SrcSel, &PTPln) : sumsel2[FOLDTBLEN-1](SrcSel, &PTPln);
          else
            tmp_max = (PTPln.di < FOLDTBLEN) ? sumsel2AL[PTPln.di](SrcSel, &PTPln) : sumsel2AL[FOLDTBLEN-1](SrcSel, &PTPln);

          if (tmp_max>dis_thresh) {
            // unscale for reporting

            tmp_max /= num_adds_2;
            cur_thresh = (dis_thresh / num_adds_2 - avg) * rcfg_dis_thresh + avg;

            ReportPulseEvent(tmp_max/avg,avg,((float)p)/(float)perdiv*res,
                             TOffset+PulsePotLen/2,FOffset,
                             (tmp_max-avg)*(float)sqrt((float)num_adds_2)/avg,
                             (cur_thresh-avg)*(float)sqrt((float)num_adds_2)/avg,
                             PTPln.dest, num_adds_2, 0);

            if ((tmp_max>cur_thresh) && ((t1=tmp_max-cur_thresh)>maxd)) {
              maxp = (float)p/(float)perdiv;
              maxd = t1;
              maxs = num_adds_2;
              max  = tmp_max;
              snr  = (float)((tmp_max-avg)*sqrt((float)num_adds_2)/avg);
              fthresh = (float)((cur_thresh-avg)*sqrt((float)num_adds_2)/avg);
              memcpy(FoldedPOT, PTPln.dest, PTPln.di*sizeof(float));
            }
          }

          num_adds_2 *=2;
        }  // for (j = 1; j < ndivs

      } // for (p = lastP

    } // for(num_adds =

  }

  analysis_state.FLOP_counter+=(PulsePotLen*0.1818181818182+400.0)*PulsePotLen;
  if (maxp!=0)
    ReportPulseEvent(max/avg,avg,maxp*res,TOffset+PulsePotLen/2,FOffset,
                     snr, fthresh, FoldedPOT, maxs, 1);

  // debug possible heap corruption -- jeffc

#ifdef _WIN32

  BOINCASSERT(_CrtCheckMemory());
#endif


  return 0;
}



syntax highlighted by Code2HTML, v. 0.9.1