/* Copyright 2005 Nicholas Bishop * * This file is part of SharpConstruct. * * SharpConstruct is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * SharpConstruct is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with SharpConstruct; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "Optimized.h" #include #include #include using namespace SharpConstruct::Optimized; void Point3D::FastDistance( const __m128 in, float& d ) { register __m128 t( data_ ); const register __m128 v( _mm_set_ps1( -0.0 ) ); t = _mm_sub_ps( t, in ); t = _mm_andnot_ps( v, t ); // Get absolute value t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 1, 0, 3 ) ) ); // Add components t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 2, 2, 2 ) ) ); // Final add, answer is in [0] _mm_store_ss( &d, t ); } void Point3D::Midpoint( const Point3D& a, const Point3D& b ) { *this = ( a + b ) / 2; } void Point3D::CalculatePlaneNormal( const __m128 p1, const __m128 p2, const __m128 p3 ) { register __m128 a, b, tmp1, tmp2, shuff1, shuff2; a = _mm_sub_ps( p1, p2 ); b = _mm_sub_ps( p3, p1 ); // Shuffles a into Y,Z,X format shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 0, 2, 1 ) ); // Shuffles b into Z,X,Y format shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 1, 1, 0, 2 ) ); tmp1 = _mm_mul_ps( shuff1, shuff2 ); shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 1, 0, 2 ) ); shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 3, 0, 2, 1 ) ); tmp2 = _mm_mul_ps( shuff1, shuff2 ); a = _mm_sub_ps( tmp1, tmp2 ); // And normalize 'a': b = _mm_mul_ps( a, a ); b = _mm_add_ps( b, _mm_shuffle_ps( b, b, _MM_SHUFFLE( 2, 1, 0, 3 ) ) ); b = _mm_add_ps( b, _mm_shuffle_ps( b, b, _MM_SHUFFLE( 2, 2, 2, 2 ) ) ); b = _mm_rsqrt_ss( b ); b = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 0, 0, 0, 0 ) ); data_ = _mm_mul_ps( a, b ); } void Point3D::CalculatePlaneUnormal( const __m128 p1, const __m128 p2, const __m128 p3 ) { register __m128 a, b, tmp1, tmp2, shuff1, shuff2; a = _mm_sub_ps( p1, p2 ); b = _mm_sub_ps( p3, p1 ); // Shuffles a into Y,Z,X format shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 0, 2, 1 ) ); // Shuffles b into Z,X,Y format shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 1, 1, 0, 2 ) ); tmp1 = _mm_mul_ps( shuff1, shuff2 ); shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 1, 0, 2 ) ); shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 3, 0, 2, 1 ) ); tmp2 = _mm_mul_ps( shuff1, shuff2 ); data_ = _mm_sub_ps( tmp1, tmp2 ); } void SharpConstruct::Optimized::Normalize( Point3D& p ) { register __m128 tmp( p ); register __m128 length; register __m128 shuff; // Square all the components tmp = _mm_mul_ps( tmp, tmp ); // Next six lines are to put X + Y + Z into length shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 1, 2, 3, 0 ) ); length = shuff; shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 2, 3, 0, 1 ) ); length = _mm_add_ss( length, shuff ); shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 3, 0, 1, 2 ) ); length = _mm_add_ss( length, shuff ); // Take the reciprocal square root length = _mm_rsqrt_ss( length ); // Move the rsqrt into all four positions length = _mm_shuffle_ps( length, length, _MM_SHUFFLE( 0, 0, 0, 0 ) ); // Multiply it by data p = _mm_mul_ps( *p.RawData(), length ); } void SharpConstruct::Optimized::Normalize( Point3DVector& v ) { const unsigned size( v.size() ); for( unsigned i = 0; i < size; ++i ) Normalize( v[ i ] ); } /*void Point3DVector::NormalizeAll() { for( unsigned i = 0; i < size(); i++ ) { register __m128 sq( data_[ i ] ); // Square each component sq = _mm_mul_ps( sq, sq ); // Horizontal add sq = _mm_add_ps( sq, _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 2, 1, 0, 3 ) ) ); sq = _mm_add_ps( sq, _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 2, 2, 2, 2 ) ) ); sq = _mm_rsqrt_ss( sq ); sq = _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 0, 0, 0, 0 ) ); data_[ i ] = _mm_mul_ps( data_[ i ], sq ); } }*/