/* Copyright 2004, 2005 Nicholas Bishop * * This file is part of SharpConstruct. * * SharpConstruct is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * SharpConstruct is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with SharpConstruct; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef OPTIMIZED_H #define OPTIMIZED_H #include "Align.hh" #include #include namespace SharpConstruct { namespace Optimized { class Point3D { public: inline Point3D() : data_( _mm_setzero_ps() ) {} inline Point3D( const __m128& in ) : data_( in ) {} inline Point3D( const float x, const float y, const float z ) : data_( _mm_set_ps( 0, z, y, x ) ) {} void CalculatePlaneNormal( const __m128 p1, const __m128 p2, const __m128 p3 ); void CalculatePlaneUnormal( const __m128 p1, const __m128 p2, const __m128 p3 ); inline float Distance( const Point3D& in ) { register __m128 p1 = data_, shuff, dis; p1 = _mm_sub_ps( p1, in.data_ ); p1 = _mm_mul_ps( p1, p1 ); shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 1, 2, 3, 0 ) ); dis = shuff; shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 2, 3, 0, 1 ) ); dis = _mm_add_ss( dis, shuff ); shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 3, 0, 1, 2 ) ); dis = _mm_add_ss( dis, shuff ); dis = _mm_rsqrt_ss( dis ); dis = _mm_rcp_ss( dis ); // Is there a faster way to do this? //return ((float*)&dis)[0]; // Yes: _mm_store_ss float d; _mm_store_ss( &d, dis ); return d; } // Not a true distance! void FastDistance( const __m128 in, float& d ); void Midpoint( const Point3D& a, const Point3D& b ); inline void CopyTo( float* loc ) { _mm_storeu_ps( loc, data_ ); } inline void CopyToAligned( float* loc ) { _mm_store_ps( loc, data_ ); } inline float HorizAdd() const { float a = 0; register __m128 t = data_; t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 1, 0, 3 ) ) ); t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 2, 2, 2 ) ) ); _mm_store_ss( &a, t ); return a; } inline void Zero() { data_ = _mm_setzero_ps(); } inline Point3D& Abs() { register __m128 v( _mm_set_ps1( -0.0 ) ); data_ = _mm_andnot_ps( v, data_ ); return *this; } inline float& X() { return ( ( float* )&data_ )[ 0 ]; } inline float& Y() { return ( ( float* )&data_ )[ 1 ]; } inline float& Z() { return ( ( float* )&data_ )[ 2 ]; } inline float& W() { return ( ( float* )&data_ )[ 3 ]; } inline const float& X() const { return ( ( float* )&data_ )[ 0 ]; } inline const float& Y() const { return ( ( float* )&data_ )[ 1 ]; } inline const float& Z() const { return ( ( float* )&data_ )[ 2 ]; } inline operator __m128() const { return data_; } /*inline bool operator==( const Point3D& p ) const { return X() == p.X() && Y() == p.Y() && Z() == p.Z(); }*/ inline Point3D operator+( const Point3D& in ) const { return Point3D( _mm_add_ps( data_, in.data_ ) ); } inline Point3D operator-( const Point3D& in ) const { return Point3D( _mm_sub_ps( data_, in.data_ ) ); } inline Point3D operator*( const Point3D& in ) const { return Point3D( _mm_mul_ps( data_, in.data_ ) ); } inline Point3D operator*( const float in ) const { return Point3D( X() * in, Y() * in, Z() * in ); } inline Point3D operator/( const float in ) const { return Point3D( X() / in, Y() / in, Z() / in ); } inline void operator+=( const Point3D& in ) { data_ = _mm_add_ps( data_, in.data_ ); } inline void operator-=( const Point3D& in ) { data_ = _mm_sub_ps( data_, in.data_ ); } inline void operator*=( const Point3D& in ) { data_ = _mm_mul_ps( data_, in.data_ ); } inline void operator/=( const float in ) { register __m128 d = data_, div = _mm_set1_ps( in ); d = _mm_div_ps( d, div ); data_ = d; } inline const __m128& Data() const { return data_; } inline const __m128* RawData() const { return &data_; } inline __m128* RawData() { return &data_; } private: __m128 data_; }; void Normalize( Point3D& ); typedef Point3D Normal3D; typedef std::vector< Point3D > Point3DVector; void Normalize( Point3DVector& ); /*class Point3DVector { public: inline Point3DVector() : _data( 0 ), _size( 0 ), _real_size( 0 ) {} inline Point3DVector( const Point3DVector& in ) : _data( 0 ), _size( 0 ), _real_size( 0 ) { resize( in._size ); for( int i = 0; i < _size; ++i ) _data[ i ] = in._data[ i ]; } inline ~Point3DVector() { clear(); } void NormalizeAll(); inline unsigned size() const { return _size; } inline void clear() { free( _data ); _data = 0; _size = 0; _real_size = 0; } inline void resize( int size ) { if( size > _real_size ) { Point3DVector tmp; if( size > 0 ) tmp = *this; clear(); _real_size = static_cast< int >( pow( 2, round( log( size ) / log( 2 ) ) ) ); if( _real_size < size ) _real_size *= 2; void* mem; posix_memalign( &mem, __alignof( __m128 ), _real_size * sizeof( __m128 ) ); _data = ( __m128* )mem; for( int i = 0; i < _real_size; ++i ) new( &_data[ i ] ) __m128; if( size > 0 ) { for( int i = 0; i < tmp._size; ++i ) _data[ i ] = tmp._data[ i ]; } } _size = size; } inline void push_back( const Point3D& in ) { resize( _size + 1 ); _data[ _size - 1 ] = in; } inline Point3DProxy operator[]( int i ) { //_proxy.Set( &_data[ i ] ); //return _proxy; return Point3DProxy( &_data[ i ] ); } inline Point3DVector& operator=( const Point3DVector& in ) { resize( in._size ); for( int i = 0; i < _size; ++i ) _data[ i ] = in._data[ i ]; return ( *this ); } inline __m128* RawData() const { return _data; } private: __m128* _data; int _size; int _real_size; };*/ } } #endif // OPTIMIZED_H