/*
* Copyright (c) 2003, 2006 Matteo Frigo
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Sun Jul 2 14:19:38 EDT 2006 */
#include "codelet-rdft.h"
#ifdef HAVE_FMA
/* Generated by: ../../../genfft/gen_r2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 16 -name r2hc_16 -include r2hc.h */
/*
* This function contains 58 FP additions, 20 FP multiplications,
* (or, 38 additions, 0 multiplications, 20 fused multiply/add),
* 38 stack variables, and 32 memory accesses
*/
/*
* Generator Id's :
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
* $Id: gen_r2hc.ml,v 1.18 2006-02-12 23:34:12 athena Exp $
*/
#include "r2hc.h"
static void r2hc_16(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs)
{
DK(KP923879532, +0.923879532511286756128183189396788286822416626);
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
DK(KP414213562, +0.414213562373095048801688724209698078569671875);
INT i;
for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) {
E TQ, TP;
{
E TB, TN, Tf, T7, Te, Tv, TO, TE, Tq, TJ, Tp, TI, TT, Ty, Tm;
E Tr, TK, Ts;
{
E TC, Ta, Td, TD;
{
E T1, T2, T4, T5;
T1 = I[0];
T2 = I[WS(is, 8)];
T4 = I[WS(is, 4)];
T5 = I[WS(is, 12)];
{
E T8, T3, T6, T9, Tb, Tc;
T8 = I[WS(is, 2)];
TB = T1 - T2;
T3 = T1 + T2;
TN = T4 - T5;
T6 = T4 + T5;
T9 = I[WS(is, 10)];
Tb = I[WS(is, 14)];
Tc = I[WS(is, 6)];
Tf = T3 - T6;
T7 = T3 + T6;
TC = T8 - T9;
Ta = T8 + T9;
Td = Tb + Tc;
TD = Tb - Tc;
}
}
{
E TG, Ti, Tj, Tk, Tg, Th;
Tg = I[WS(is, 1)];
Th = I[WS(is, 9)];
Te = Ta + Td;
Tv = Td - Ta;
TO = TD - TC;
TE = TC + TD;
TG = Tg - Th;
Ti = Tg + Th;
Tj = I[WS(is, 5)];
Tk = I[WS(is, 13)];
{
E Tn, To, TH, Tl;
Tn = I[WS(is, 15)];
To = I[WS(is, 7)];
Tq = I[WS(is, 3)];
TH = Tj - Tk;
Tl = Tj + Tk;
TJ = Tn - To;
Tp = Tn + To;
TI = FNMS(KP414213562, TH, TG);
TT = FMA(KP414213562, TG, TH);
Ty = Ti + Tl;
Tm = Ti - Tl;
Tr = I[WS(is, 11)];
}
}
}
ro[WS(ros, 4)] = T7 - Te;
TK = Tr - Tq;
Ts = Tq + Tr;
{
E Tx, TV, TF, TS, Tz, Tt, TM, TL;
Tx = T7 + Te;
TV = FNMS(KP707106781, TE, TB);
TF = FMA(KP707106781, TE, TB);
TL = FNMS(KP414213562, TK, TJ);
TS = FMA(KP414213562, TJ, TK);
Tz = Tp + Ts;
Tt = Tp - Ts;
TM = TI + TL;
TQ = TL - TI;
{
E TR, TU, TW, TA, Tw, Tu;
TP = FMA(KP707106781, TO, TN);
TR = FNMS(KP707106781, TO, TN);
TA = Ty + Tz;
io[WS(ios, 4)] = Tz - Ty;
Tw = Tt - Tm;
Tu = Tm + Tt;
ro[WS(ros, 1)] = FMA(KP923879532, TM, TF);
ro[WS(ros, 7)] = FNMS(KP923879532, TM, TF);
ro[0] = Tx + TA;
ro[WS(ros, 8)] = Tx - TA;
io[WS(ios, 6)] = FMS(KP707106781, Tw, Tv);
io[WS(ios, 2)] = FMA(KP707106781, Tw, Tv);
ro[WS(ros, 2)] = FMA(KP707106781, Tu, Tf);
ro[WS(ros, 6)] = FNMS(KP707106781, Tu, Tf);
TU = TS - TT;
TW = TT + TS;
io[WS(ios, 7)] = FMA(KP923879532, TU, TR);
io[WS(ios, 1)] = FMS(KP923879532, TU, TR);
ro[WS(ros, 3)] = FMA(KP923879532, TW, TV);
ro[WS(ros, 5)] = FNMS(KP923879532, TW, TV);
}
}
}
io[WS(ios, 5)] = FMS(KP923879532, TQ, TP);
io[WS(ios, 3)] = FMA(KP923879532, TQ, TP);
}
}
static const kr2hc_desc desc = { 16, "r2hc_16", {38, 0, 20, 0}, &GENUS, 0, 0, 0, 0, 0 };
void X(codelet_r2hc_16) (planner *p) {
X(kr2hc_register) (p, r2hc_16, &desc);
}
#else /* HAVE_FMA */
/* Generated by: ../../../genfft/gen_r2hc -compact -variables 4 -pipeline-latency 4 -n 16 -name r2hc_16 -include r2hc.h */
/*
* This function contains 58 FP additions, 12 FP multiplications,
* (or, 54 additions, 8 multiplications, 4 fused multiply/add),
* 34 stack variables, and 32 memory accesses
*/
/*
* Generator Id's :
* $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $
* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $
* $Id: gen_r2hc.ml,v 1.18 2006-02-12 23:34:12 athena Exp $
*/
#include "r2hc.h"
static void r2hc_16(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs)
{
DK(KP923879532, +0.923879532511286756128183189396788286822416626);
DK(KP382683432, +0.382683432365089771728459984030398866761344562);
DK(KP707106781, +0.707106781186547524400844362104849039284835938);
INT i;
for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) {
E T3, T6, T7, Tz, Ti, Ta, Td, Te, TA, Th, Tq, TV, TF, TP, Tx;
E TU, TE, TM, Tg, Tf, TJ, TQ;
{
E T1, T2, T4, T5;
T1 = I[0];
T2 = I[WS(is, 8)];
T3 = T1 + T2;
T4 = I[WS(is, 4)];
T5 = I[WS(is, 12)];
T6 = T4 + T5;
T7 = T3 + T6;
Tz = T1 - T2;
Ti = T4 - T5;
}
{
E T8, T9, Tb, Tc;
T8 = I[WS(is, 2)];
T9 = I[WS(is, 10)];
Ta = T8 + T9;
Tg = T8 - T9;
Tb = I[WS(is, 14)];
Tc = I[WS(is, 6)];
Td = Tb + Tc;
Tf = Tb - Tc;
}
Te = Ta + Td;
TA = KP707106781 * (Tg + Tf);
Th = KP707106781 * (Tf - Tg);
{
E Tm, TN, Tp, TO;
{
E Tk, Tl, Tn, To;
Tk = I[WS(is, 15)];
Tl = I[WS(is, 7)];
Tm = Tk - Tl;
TN = Tk + Tl;
Tn = I[WS(is, 3)];
To = I[WS(is, 11)];
Tp = Tn - To;
TO = Tn + To;
}
Tq = FNMS(KP923879532, Tp, KP382683432 * Tm);
TV = TN + TO;
TF = FMA(KP923879532, Tm, KP382683432 * Tp);
TP = TN - TO;
}
{
E Tt, TK, Tw, TL;
{
E Tr, Ts, Tu, Tv;
Tr = I[WS(is, 1)];
Ts = I[WS(is, 9)];
Tt = Tr - Ts;
TK = Tr + Ts;
Tu = I[WS(is, 5)];
Tv = I[WS(is, 13)];
Tw = Tu - Tv;
TL = Tu + Tv;
}
Tx = FMA(KP382683432, Tt, KP923879532 * Tw);
TU = TK + TL;
TE = FNMS(KP382683432, Tw, KP923879532 * Tt);
TM = TK - TL;
}
ro[WS(ros, 4)] = T7 - Te;
io[WS(ios, 4)] = TV - TU;
{
E Tj, Ty, TD, TG;
Tj = Th - Ti;
Ty = Tq - Tx;
io[WS(ios, 1)] = Tj + Ty;
io[WS(ios, 7)] = Ty - Tj;
TD = Tz + TA;
TG = TE + TF;
ro[WS(ros, 7)] = TD - TG;
ro[WS(ros, 1)] = TD + TG;
}
{
E TB, TC, TH, TI;
TB = Tz - TA;
TC = Tx + Tq;
ro[WS(ros, 5)] = TB - TC;
ro[WS(ros, 3)] = TB + TC;
TH = Ti + Th;
TI = TF - TE;
io[WS(ios, 3)] = TH + TI;
io[WS(ios, 5)] = TI - TH;
}
TJ = T3 - T6;
TQ = KP707106781 * (TM + TP);
ro[WS(ros, 6)] = TJ - TQ;
ro[WS(ros, 2)] = TJ + TQ;
{
E TR, TS, TT, TW;
TR = Td - Ta;
TS = KP707106781 * (TP - TM);
io[WS(ios, 2)] = TR + TS;
io[WS(ios, 6)] = TS - TR;
TT = T7 + Te;
TW = TU + TV;
ro[WS(ros, 8)] = TT - TW;
ro[0] = TT + TW;
}
}
}
static const kr2hc_desc desc = { 16, "r2hc_16", {54, 8, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
void X(codelet_r2hc_16) (planner *p) {
X(kr2hc_register) (p, r2hc_16, &desc);
}
#endif /* HAVE_FMA */
syntax highlighted by Code2HTML, v. 0.9.1