/*
* transcendentals.h
*
* A collection of inline AltiVec math functions.
*
* Written in 2003 by Leo Fink (leofink@mac.com).
*
**/
inline vector float rsqrt(const vector float& v)
{
const vector float _0 = (vector float)(-.0);
// obtain estimate
vector float y = vec_rsqrte(v);
// one round of Newton-Raphson
y = vec_madd(vec_nmsub(v,vec_madd(y,y,_0),(vector float)(1.0)),vec_madd(y,(vector float)(0.5),_0),y);
return y;
}
inline vector float sqrt(const vector float& v)
{
const vector float _0 = (vector float)(-.0);
return vec_sel(vec_madd(v,rsqrt(v),_0),_0,vec_cmpeq(v,_0));
}
inline vector float log2(const vector float& x) // |error| < 2.3e-6
{
const vector unsigned int SHIFT = (vector unsigned int)(23);
const vector unsigned int BIAS = (vector unsigned int)(127);
// extract exponent and mantissa. 1 <= m < 2
const vector float e = vec_ctf((vector signed int)vec_sub(vec_sr(vec_and((vector unsigned int)(x),(vector unsigned int)(0x7f800000)),SHIFT),BIAS),0);
vector float m = (vector float)(vec_or(vec_and((vector unsigned int)(x),(vector unsigned int)(0x007FFFFF)),vec_sl(BIAS,SHIFT)));
// approximate log2(m) by a chebyshev polynomial
m = vec_sub(m,(vector float)(1.));
vector float y = vec_madd(m,(vector float)(-.0258411662),(vector float)(.1217970128));
y = vec_madd(m,y,(vector float)(-.2779042655));
y = vec_madd(m,y,(vector float)(.4575485901));
y = vec_madd(m,y,(vector float)(-.7181451002));
y = vec_madd(m,y,(vector float)(1.4425449290));
return vec_madd(m,y,e); // log2(x) = log2(m)+e
}
inline vector float log(const vector float& x)
{ // ln(x) = log2(x)/ln(2)
return vec_madd(log2(x),(vector float)(.69314718056),(vector float)(-.0));
}
inline vector float exp2(const vector float& x) // |error|/exp2(x) < 3.4e-6
{
// extract integral and fractional part. x=n+r, 0 <= r < 1
const vector float n = vec_max(vec_floor(x),(vector float)(-127));
const vector float r = vec_sub(x,n);
// approximate 2^r by a chebyshev polynomial
vector float y = vec_madd(r,(vector float)(.0135557571),(vector float)(.0520323499));
y = vec_madd(r,y,(vector float)(.2413797743));
y = vec_madd(r,y,(vector float)(.6930321187));
y = vec_madd(r,y,(vector float)(1.0));
// calculate power of integral part by bit-shifting
const vector float zhn = (vector float)(vec_sl(vec_add((vector signed int)(127),vec_cts(n,0)),(vector unsigned int)(23)));
return vec_madd(y,zhn,(vector float)(-.0)); // 2^x = 2^r*2^n;
}
inline vector float exp(const vector float& x)
{ // exp(x) = 2^(x/ln(2))
return exp2(vec_madd(x,(vector float)(1.442695041),(vector float)(-.0)));
}
inline vector float pow(const vector float x,const vector float p)
{ // x^p = 2^(p*log2(x))
return exp2(vec_madd(p,log2(x),(vector float)(-.0)));
}
inline vector float cos(const vector float& x) // |error| < 5e-7
{
const vector float _0 = (vector float)(-.0);
const vector float H = (vector float)(.5);
// reduce argument to [0,1[
vector float r = vec_madd(x,(vector float)(.1591549431),_0);
r = vec_abs(vec_sub(vec_sub(r,vec_floor(r)),H));
r = vec_nmsub(r,(vector float)(2),H);
// save sign bit
vector unsigned int s = vec_andc((vector unsigned int)(0x80000000),(vector unsigned int)(r));
// reduce argument to [0,1/2[
r = vec_sub(H,vec_abs(r));
// approximate cos(r*pi) by a chebyshev polynomial
r = vec_madd(r,r,_0);
vector float y = vec_madd(r,(vector float)(-.2580631862e-1),(vector float)(.2353306303));
y = vec_madd(y,r,(vector float)(-1.335262769));
y = vec_madd(y,r,(vector float)(4.058712126));
y = vec_madd(y,r,(vector float)(-4.934802201));
y = vec_madd(y,r,(vector float)(1.));
// restore sign
return (vector float)(vec_xor((vector unsigned int)(y),s));
}
inline vector float sin(const vector float& x) // |error| < 5e-7
{
const vector float _0 = (vector float)(-.0);
const vector float H = (vector float)(.5);
// reduce argument to [0,1[
vector float r = vec_madd(x,(vector float)(.1591549431),_0);
r = vec_sub(vec_sub(r,vec_floor(r)),H);
// save sign bit
vector unsigned int s = vec_andc((vector unsigned int)(0x80000000),(vector unsigned int)(r));
// reduce argument to [0,1/2[
r = vec_sub(H,vec_abs(vec_nmsub(vec_abs(r),(vector float)(2),H)));
// approximate sin(r*pi) by a chebyshev polynomial
const vector float r_ = r;
r = vec_madd(r,r,_0);
vector float y = vec_madd(r,(vector float)(-.7370292530e-2),(vector float)(.8214588660e-1));
y = vec_madd(y,r,(vector float)(-.5992645293));
y = vec_madd(y,r,(vector float)(2.550164040));
y = vec_madd(y,r,(vector float)(-5.167712780));
y = vec_madd(y,r,(vector float)(3.141592654));
y = vec_madd(y,r_,_0);
// restore sign
return (vector float)(vec_xor((vector unsigned int)(y),s));
}
syntax highlighted by Code2HTML, v. 0.9.1