/* Copyright (C) 2003 Rice1964 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "stdafx.h" extern FiddledVtx * g_pVtxBase; #define ENABLE_CLIP_TRI #define X_CLIP_MAX 0x1 #define X_CLIP_MIN 0x2 #define Y_CLIP_MAX 0x4 #define Y_CLIP_MIN 0x8 #define Z_CLIP_MAX 0x10 #define Z_CLIP_MIN 0x20 #ifdef ENABLE_CLIP_TRI #define index(a,b) (((a)<<2)+(b)) inline void RSP_Vtx_Clipping(int i) { g_clipFlag[i] = 0; g_clipFlag2[i] = 0; if( g_vecProjected[i].w > 0 ) { /* if( gRSP.bRejectVtx ) { if( g_vecProjected[i].x > 1 ) { g_clipFlag2[i] |= X_CLIP_MAX; if( g_vecProjected[i].x > gRSP.real_clip_ratio_posx ) g_clipFlag[i] |= X_CLIP_MAX; } if( g_vecProjected[i].x < -1 ) { g_clipFlag2[i] |= X_CLIP_MIN; if( g_vecProjected[i].x < gRSP.real_clip_ratio_negx ) g_clipFlag[i] |= X_CLIP_MIN; } if( g_vecProjected[i].y > 1 ) { g_clipFlag2[i] |= Y_CLIP_MAX; if( g_vecProjected[i].y > gRSP.real_clip_ratio_posy ) g_clipFlag[i] |= Y_CLIP_MAX; } if( g_vecProjected[i].y < -1 ) { g_clipFlag2[i] |= Y_CLIP_MIN; if( g_vecProjected[i].y < gRSP.real_clip_ratio_negy ) g_clipFlag[i] |= Y_CLIP_MIN; } //if( g_vecProjected[i].z > 1.0f ) //{ // g_clipFlag2[i] |= Z_CLIP_MAX; // g_clipFlag[i] |= Z_CLIP_MAX; //} //if( gRSP.bNearClip && g_vecProjected[i].z < -1.0f ) //{ // g_clipFlag2[i] |= Z_CLIP_MIN; // g_clipFlag[i] |= Z_CLIP_MIN; //} } else */ { if( g_vecProjected[i].x > 1 ) g_clipFlag2[i] |= X_CLIP_MAX; if( g_vecProjected[i].x < -1 ) g_clipFlag2[i] |= X_CLIP_MIN; if( g_vecProjected[i].y > 1 ) g_clipFlag2[i] |= Y_CLIP_MAX; if( g_vecProjected[i].y < -1 ) g_clipFlag2[i] |= Y_CLIP_MIN; //if( g_vecProjected[i].z > 1.0f ) g_clipFlag2[i] |= Z_CLIP_MAX; //if( gRSP.bNearClip && g_vecProjected[i].z < -1.0f ) g_clipFlag2[i] |= Z_CLIP_MIN; } } } #else inline void RSP_Vtx_Clipping(int i) {} #endif /* * Global variables */ RSP_Options gRSP; RDP_Options gRDP; __declspec(align(16)) D3DXVECTOR4 g_normal; static int norms[3]; __declspec(align(16)) D3DXVECTOR4 g_vtxNonTransformed[MAX_VERTS]; __declspec(align(16)) D3DXVECTOR4 g_vecProjected[MAX_VERTS]; __declspec(align(16)) D3DXVECTOR4 g_vtxTransformed[MAX_VERTS]; float g_vtxProjected5[1000][5]; float g_fFogCoord[MAX_VERTS]; BYTE g_FogBytes[MAX_VERTS]; DWORD g_dwVecFlags[MAX_VERTS]; // Z_POS Z_NEG etc VECTOR2 g_vecTexture[MAX_VERTS]; VECTOR2 g_vecTextureMapped[MAX_VERTS]; DWORD g_dwVecDiffuseCol[MAX_VERTS]; DWORD g_clipFlag[MAX_VERTS]; DWORD g_clipFlag2[MAX_VERTS]; DaedalusRenderTexture g_textures[MAX_TEXTURES]; TLITVERTEX g_ucVertexBuffer[1000]; unsigned int g_vtxIndex[1000]; unsigned int g_triVtxIndexes[1000]; unsigned int g_minIndex, g_maxIndex; bool g_triVtxInitFlags[1000]; BYTE g_oglVtxColors[1000][4]; float gRSPfFogMin; float gRSPfFogMax; float gRSPfFogDivider; DWORD gRSPnumLights; DaedalusLight gRSPlights[16]; __declspec(align(16)) DaedalusMatrix gRSPworldProjectTransported; __declspec(align(16)) DaedalusMatrix gRSPworldProject; N64Light gRSPn64lights[16]; __declspec(align(16)) DaedalusMatrix gRSPmodelViewTop; __declspec(align(16)) DaedalusMatrix gRSPmodelViewTopTranspose; __declspec(align(16)) DaedalusMatrix dkrMatrixTransposed; void (*SetNewVertexInfo)(DWORD dwAddress, DWORD dwV0, DWORD dwNum)=NULL; /* * */ /*n.x = (g_normal.x * matWorld.m00) + (g_normal.y * matWorld.m10) + (g_normal.z * matWorld.m20); n.y = (g_normal.x * matWorld.m01) + (g_normal.y * matWorld.m11) + (g_normal.z * matWorld.m21); n.z = (g_normal.x * matWorld.m02) + (g_normal.y * matWorld.m12) + (g_normal.z * matWorld.m22);*/ // Multiply (x,y,z,0) by matrix m, then normalize #ifdef _WIN32 #define Vec3TransformNormal(vec, m) __asm \ { \ __asm fld dword ptr [vec + 0] \ __asm fmul dword ptr [m + 0] /* x m00*/ \ __asm fld dword ptr [vec + 0] \ __asm fmul dword ptr [m + 4] /* x m01 x m00*/ \ __asm fld dword ptr [vec + 0] \ __asm fmul dword ptr [m + 8] /* x m02 x m01 x m00*/ \ \ __asm fld dword ptr [vec + 4] \ __asm fmul dword ptr [m + 16] /* y m10 x m02 x m01 x m00*/ \ __asm fld dword ptr [vec + 4] \ __asm fmul dword ptr [m + 20] /* y m11 y m10 x m02 x m01 x m00*/ \ __asm fld dword ptr [vec + 4] \ __asm fmul dword ptr [m + 24] /* y m12 y m11 y m10 x m02 x m01 x m00*/ \ \ __asm fxch st(2) /* y m10 y m11 y m12 x m02 x m01 x m00*/ \ __asm faddp st(5), st(0) /* y m11 y m12 x m02 x m01 (x m00 + y m10)*/ \ __asm faddp st(3), st(0) /* y m12 x m02 (x m01 + ym11) (x m00 + y m10)*/ \ __asm faddp st(1), st(0) /* (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ \ __asm fld dword ptr [vec + 8] \ __asm fmul dword ptr [m + 32] /* z m20 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ __asm fld dword ptr [vec + 8] \ __asm fmul dword ptr [m + 36] /* z m21 z m20 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ __asm fld dword ptr [vec + 8] \ __asm fmul dword ptr [m + 40] /* z m22 z m21 z m20 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ \ __asm fxch st(2) /* z m20 z m21 z m22 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ __asm faddp st(5), st(0) /* z m21 z m22 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10 + z m20)*/ \ __asm faddp st(3), st(0) /* z m22 (x m02 + y m12) (x m01 + ym11 + z m21) (x m00 + y m10 + z m20)*/ \ __asm faddp st(1), st(0) /* (x m02 + y m12 + z m 22) (x m01 + ym11 + z m21) (x m00 + y m10 + z m20)*/ \ \ __asm fxch st(2) /* (x m00 + y m10 + z m20) (x m01 + ym11 + z m21) (x m02 + y m12 + z m 22) */ \ \ __asm fld1 /* 1 x y z */ \ __asm fld st(1) /* x 1 x y z */ \ __asm fmul st(0),st(0) /* xx 1 x y z */ \ __asm fld st(3) /* y xx 1 x y z */ \ __asm fmul st(0),st(0) /* yy xx 1 x y z */ \ __asm fld st(5) /* z yy xx 1 x y z */ \ __asm fmul st(0),st(0) /* zz yy xx 1 x y z */ \ \ __asm fxch st(2) /* xx yy zz 1 x y z */ \ \ __asm faddp st(1),st(0) /* (xx+yy) zz 1 x y z */ \ __asm faddp st(1),st(0) /* (xx+yy+zz) 1 x y z */ \ \ __asm fsqrt /* l 1 x y z */ \ \ __asm fdivp st(1),st(0) /* (1/l) x y z */ \ \ __asm fmul st(3),st(0) /* f x y fz */ \ __asm fmul st(2),st(0) /* f x fy fz */ \ __asm fmulp st(1),st(0) /* fx fy fz */ \ \ __asm fstp dword ptr [vec + 0] /* fy fz*/ \ __asm fstp dword ptr [vec + 4] /* fz */ \ __asm fstp dword ptr [vec + 8] /* done */ \ } \ #else // _WIN32 /*#define Vec3TransformNormal(vec, m) asm( \ "fld (,%%eax) \n" \ "fmul (,%%edx) \n" \ "fld (,%%eax) \n" \ "fmul 4(,%%edx) \n" \ "fld (,%%eax) \n" \ "fmul 8(,%%edx) \n" \ \ "fld 4(,%%eax) \n" \ "fmul 16(,%%edx) \n" \ "fld 4(,%%eax) \n" \ "fmul 20(,%%edx) \n" \ "fld 4(,%%eax) \n" \ "fmul 24(,%%edx) \n" \ \ "fxch %%st(2) \n" \ "faddp %%st(0), %%st(5) \n" \ "faddp %%st(0), %%st(3) \n" \ "faddp %%st(0), %%st(1) \n" \ \ "fld 8(,%%eax) \n" \ "fmul 32(,%%edx) \n" \ "fld 8(,%%eax) \n" \ "fmul 36(,%%edx) \n" \ "fld 8(,%%eax) \n" \ "fmul 40(,%%edx) \n" \ \ "fxch %%st(2) \n" \ "faddp %%st(0), %%st(5) \n" \ "faddp %%st(0), %%st(3) \n" \ "faddp %%st(0), %%st(1) \n" \ \ "fxch %%st(2) \n" \ \ "fld1 \n" \ "fld %%st(1) \n" \ "fmul %%st(0), %%st(0) \n" \ "fld %%st(3) \n" \ "fmul %%st(0),%%st(0) \n" \ "fld %%st(5) \n" \ "fmul %%st(0),%%st(0) \n" \ \ "fxch %%st(2) \n" \ \ "faddp %%st(0), %%st(1) \n" \ "faddp %%st(0), %%st(1) \n" \ \ "fsqrt \n" \ \ "fdivp %%st(0), %%st(1) \n" \ \ "fmul %%st(0), %%st(3) \n" \ "fmul %%st(0), %%st(2) \n" \ "fmulp %%st(0), %%st(1) \n" \ \ "fstp (,%%eax) \n" \ "fstp 4(,%%eax) \n" \ "fstp 8(,%%eax) \n" \ : \ : "a"(&vec), "d"(&m) \ : "memory", "%st(1)", "%st(2)", "%st(3)", "%st(4)", "%st(5)" \ );*/ #define Vec3TransformNormal(vec, m) \ D3DVECTOR temp; \ temp.x = (vec.x * m._11) + (vec.y * m._21) + (vec.z * m._31); \ temp.y = (vec.x * m._12) + (vec.y * m._22) + (vec.z * m._32); \ temp.z = (vec.x * m._13) + (vec.y * m._23) + (vec.z * m._33); \ float norm = sqrt(temp.x*temp.x+temp.y*temp.y+temp.z*temp.z); \ vec.x = temp.x/norm; vec.y = temp.y/norm; vec.z = temp.z/norm; #endif // _WIN32 // Multiply (x,y,z,1) by matrix m and project onto w = 1 #define Vec3TransformCoord(vecout, vec, m) __asm \ { \ __asm fld dword ptr [vec + 0] \ __asm fmul dword ptr [m + 0] /* x m00*/ \ __asm fld dword ptr [vec + 0] \ __asm fmul dword ptr [m + 4] /* x m01 x m00*/ \ __asm fld dword ptr [vec + 0] \ __asm fmul dword ptr [m + 8] /* x m02 x m01 x m00*/ \ \ __asm fld dword ptr [vec + 4] \ __asm fmul dword ptr [m + 16] /* y m10 x m02 x m01 x m00*/ \ __asm fld dword ptr [vec + 4] \ __asm fmul dword ptr [m + 20] /* y m11 y m10 x m02 x m01 x m00*/ \ __asm fld dword ptr [vec + 4] \ __asm fmul dword ptr [m + 24] /* y m12 y m11 y m10 x m02 x m01 x m00*/ \ \ __asm fxch st(2) /* y m10 y m11 y m12 x m02 x m01 x m00*/ \ __asm faddp st(5), st(0) /* y m11 y m12 x m02 x m01 (x m00 + y m10)*/ \ __asm faddp st(3), st(0) /* y m12 x m02 (x m01 + ym11) (x m00 + y m10)*/ \ __asm faddp st(1), st(0) /* (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ \ __asm fld dword ptr [vec + 8] \ __asm fmul dword ptr [m + 32] /* z m20 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ __asm fld dword ptr [vec + 8] \ __asm fmul dword ptr [m + 36] /* z m21 z m20 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ __asm fld dword ptr [vec + 8] \ __asm fmul dword ptr [m + 40] /* z m22 z m21 z m20 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ \ __asm fxch st(2) /* z m20 z m21 z m22 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10)*/ \ __asm faddp st(5), st(0) /* z m21 z m22 (x m02 + y m12) (x m01 + ym11) (x m00 + y m10 + z m20)*/ \ __asm faddp st(3), st(0) /* z m22 (x m02 + y m12) (x m01 + ym11 + z m21) (x m00 + y m10 + z m20)*/ \ __asm faddp st(1), st(0) /* (x m02 + y m12 + z m 22) (x m01 + ym11 + z m21) (x m00 + y m10 + z m20)*/ \ \ __asm fxch st(2) /* (x m00 + y m10 + z m20) (x m01 + ym11 + z m21) (x m02 + y m12 + z m 22) */ \ __asm fadd dword ptr [m + 48] /* (xm00+ym10+zm20+m30) (xm01+ym11+zm21) (xm02+ym12+zm22) */ \ __asm fxch st(1) /* (xm01+ym11+zm21) (xm00+ym10+zm20+m30) (xm02+ym12+zm22) */ \ __asm fadd dword ptr [m + 52] /* (xm01+ym11+zm21+m31) (xm00+ym10+zm20+m30) (xm02+ym12+zm22) */ \ __asm fxch st(2) /* (xm02+ym12+zm22) (xm00+ym10+zm20+m30)(xm01+ym11+zm21+m31) */ \ __asm fadd dword ptr [m + 56] /* (xm02+ym12+zm22+m32) (xm00+ym10+zm20+m30)(xm01+ym11+zm21+m31) */ \ __asm fxch st(1) /* (xm00+ym10+zm20+m30)(xm02+ym12+zm22+m32) (xm01+ym11+zm21+m31) */ \ \ __asm fld1 /* 1 abc */ \ __asm fld dword ptr [vec + 0] /* x 1 abc*/ \ __asm fmul dword ptr [m + 12] /* xm03 1 abc*/ \ __asm fld dword ptr [vec + 4] /* y xm03 1 abc*/ \ __asm fmul dword ptr [m + 28] /* ym13 xm03 1 abc*/ \ __asm fld dword ptr [vec + 8] /* z ym13 xm03 1 abc*/ \ __asm fmul dword ptr [m + 44] /* zm23 ym13 xm03 1 abc*/ \ \ __asm fxch st(2) /* xm03 ym13 zm23 1 abc*/ \ __asm faddp st(1), st(0) /* (xm03 + ym13) zm23 1 abc*/ \ __asm faddp st(1), st(0) /* (xm03 + ym13 + zm23) 1 abc*/ \ \ __asm fadd dword ptr [m + 60] /* (xm03+ym13+zm23+m33) 1 abc*/ \ \ __asm fdivp st(1), st(0) /* 1.0 / (xm03+ym13+zm23+m33) abc*/ \ \ __asm fmul st(3),st(0) /* f a b fc */ \ __asm fmul st(2),st(0) /* f a fb fc */ \ __asm fmulp st(1),st(0) /* fx fz fy */ \ \ __asm fstp dword ptr [vecout + 0] /* (xm02+ym12+zm22+m32) (xm01+ym11+zm21+m31)*/ \ __asm fstp dword ptr [vecout + 8] /* (xm01+ym11+zm21+m31) */ \ __asm fstp dword ptr [vecout + 4] /* done */ \ } \ #ifdef USING_INT_MATRIX void IntVec3Transform(D3DXVECTOR4 &vec4, FiddledVtx &vtx, N64IntMatrix &mtx) { int x = vtx.x*mtx.nums[0]+vtx.y*mtx.nums[4]+vtx.z*mtx.nums[8]+mtx.nums[12]; int y = vtx.x*mtx.nums[1]+vtx.y*mtx.nums[5]+vtx.z*mtx.nums[9]+mtx.nums[13]; int z = vtx.x*mtx.nums[2]+vtx.y*mtx.nums[6]+vtx.z*mtx.nums[10]+mtx.nums[14]; int w = vtx.x*mtx.nums[3]+vtx.y*mtx.nums[7]+vtx.z*mtx.nums[11]+mtx.nums[15]; vec4.x = x + (vtx.x*(int)mtx.decs[0]+vtx.y*(int)mtx.decs[4]+vtx.z*(int)mtx.decs[8]+(int)mtx.decs[12])/65536.0f; vec4.y = y + (vtx.x*(int)mtx.decs[1]+vtx.y*(int)mtx.decs[5]+vtx.z*(int)mtx.decs[9]+(int)mtx.decs[13])/65536.0f; vec4.z = z + (vtx.x*(int)mtx.decs[2]+vtx.y*(int)mtx.decs[6]+vtx.z*(int)mtx.decs[10]+(int)mtx.decs[14])/65536.0f; vec4.w = w + (vtx.x*(int)mtx.decs[3]+vtx.y*(int)mtx.decs[7]+vtx.z*(int)mtx.decs[11]+(int)mtx.decs[15])/65536.0f; } void IntVec3TransformNormal(int norms[3], NormalStruct &norm, N64IntMatrix &mtx) { int x = norm.nx*mtx.nums[0]+norm.ny*mtx.nums[4]+norm.nz*mtx.nums[8]; int y = norm.nx*mtx.nums[1]+norm.ny*mtx.nums[5]+norm.nz*mtx.nums[9]; int z = norm.nx*mtx.nums[2]+norm.ny*mtx.nums[6]+norm.nz*mtx.nums[10]; norms[0] = x + ((norm.nx*(int)mtx.decs[0]+norm.ny*(int)mtx.decs[4]+norm.nz*(int)mtx.decs[8])>>16); norms[1] = y + ((norm.nx*(int)mtx.decs[1]+norm.ny*(int)mtx.decs[5]+norm.nz*(int)mtx.decs[9])>>16); norms[2] = z + ((norm.nx*(int)mtx.decs[2]+norm.ny*(int)mtx.decs[6]+norm.nz*(int)mtx.decs[10])>>16); } #endif #ifdef _WIN32 __declspec( naked ) void __fastcall SSEVec3Transform(int i) { //SSEVec3Transform(g_vtxTransformed[i], g_vtxNonTransformed[i]); __asm { shl ecx,4; // ecx = i movaps xmm1, DWORD PTR g_vtxNonTransformed [ecx]; // xmm1 as original vector movaps xmm4, DWORD PTR gRSPworldProjectTransported; // row1 movaps xmm5, DWORD PTR gRSPworldProjectTransported[0x10]; // row2 movaps xmm6, DWORD PTR gRSPworldProjectTransported[0x20]; // row3 movaps xmm7, DWORD PTR gRSPworldProjectTransported[0x30]; // row4 mulps xmm4, xmm1; // row 1 mulps xmm5, xmm1; // row 2 mulps xmm6, xmm1; // row 3 mulps xmm7, xmm1; // row 4 movhlps xmm0, xmm4; // xmm4 high to xmm0 low movlhps xmm0, xmm5; // xmm5 low to xmm0 high addps xmm4, xmm0; // result of add are in xmm4 low addps xmm5, xmm0; // result of add are in xmm5 high shufps xmm0, xmm4, 0x44; // move xmm4 low DWORDs to xmm0 high shufps xmm4, xmm5, 0xe4; // move xmm5 high DWORS to xmm4 movhlps xmm5, xmm0; // xmm4, xmm5 are mirrored shufps xmm4, xmm4, 0x08; // move xmm4's 3rd DWORD to its 2nd DWORD shufps xmm5, xmm5, 0x0d; // move xmm5's 4th DWORD to its 2nd DWORD, // and move its 2nd DWORD to its 1st DWORD addps xmm4, xmm5; // results are in 1st and 2nd DWORD movhlps xmm0, xmm6; // xmm6 high to xmm0 low movlhps xmm0, xmm7; // xmm7 low to xmm0 high addps xmm6, xmm0; // result of add are in xmm6 low addps xmm7, xmm0; // result of add are in xmm7 high shufps xmm0, xmm6, 0x44; // move xmm6 low DWORDs to xmm0 high shufps xmm6, xmm7, 0xe4; // move xmm7 high DWORS to xmm6 movhlps xmm7, xmm0; // xmm6, xmm7 are mirrored shufps xmm6, xmm6, 0x08; // move xmm6's 3rd DWORD to its 2nd DWORD shufps xmm7, xmm7, 0x0d; // move xmm7's 4th DWORD to its 2nd DWORD, // and move its 2nd DWORD to its 1st DWORD addps xmm6, xmm7; // results are in 1st and 2nd DWORD movlhps xmm4, xmm6; // final result is in xmm4 movaps DWORD PTR g_vtxTransformed [ecx], xmm4; movaps xmm0,xmm4; shufps xmm0,xmm0,0xff; divps xmm4,xmm0; rcpps xmm0,xmm0; movhlps xmm0,xmm4; shufps xmm0,xmm0,0xe8; movlhps xmm4,xmm0; movaps DWORD PTR g_vecProjected [ecx], xmm4; emms; ret; } } // Only used by DKR __declspec( naked ) void __fastcall SSEVec3TransformDKR(D3DXVECTOR4 &pOut, const D3DXVECTOR4 &pV) { __asm { movaps xmm1, DWORD PTR [edx]; // xmm1 as original vector movaps xmm4, DWORD PTR dkrMatrixTransposed; // row1 movaps xmm5, DWORD PTR dkrMatrixTransposed[0x10]; // row2 movaps xmm6, DWORD PTR dkrMatrixTransposed[0x20]; // row3 movaps xmm7, DWORD PTR dkrMatrixTransposed[0x30]; // row4 mulps xmm4, xmm1; // row 1 mulps xmm5, xmm1; // row 2 mulps xmm6, xmm1; // row 3 mulps xmm7, xmm1; // row 4 movhlps xmm0, xmm4; // xmm4 high to xmm0 low movlhps xmm0, xmm5; // xmm5 low to xmm0 high addps xmm4, xmm0; // result of add are in xmm4 low addps xmm5, xmm0; // result of add are in xmm5 high shufps xmm0, xmm4, 0x44; // move xmm4 low DWORDs to xmm0 high shufps xmm4, xmm5, 0xe4; // move xmm5 high DWORS to xmm4 movhlps xmm5, xmm0; // xmm4, xmm5 are mirrored shufps xmm4, xmm4, 0x08; // move xmm4's 3rd DWORD to its 2nd DWORD shufps xmm5, xmm5, 0x0d; // move xmm5's 4th DWORD to its 2nd DWORD, // and move its 2nd DWORD to its 1st DWORD addps xmm4, xmm5; // results are in 1st and 2nd DWORD movhlps xmm0, xmm6; // xmm6 high to xmm0 low movlhps xmm0, xmm7; // xmm7 low to xmm0 high addps xmm6, xmm0; // result of add are in xmm6 low addps xmm7, xmm0; // result of add are in xmm7 high shufps xmm0, xmm6, 0x44; // move xmm6 low DWORDs to xmm0 high shufps xmm6, xmm7, 0xe4; // move xmm7 high DWORS to xmm6 movhlps xmm7, xmm0; // xmm6, xmm7 are mirrored shufps xmm6, xmm6, 0x08; // move xmm6's 3rd DWORD to its 2nd DWORD shufps xmm7, xmm7, 0x0d; // move xmm7's 4th DWORD to its 2nd DWORD, // and move its 2nd DWORD to its 1st DWORD addps xmm6, xmm7; // results are in 1st and 2nd DWORD movlhps xmm4, xmm6; // final result is in xmm4 movaps DWORD PTR [ecx], xmm4; emms; ret; } } #endif // _WIN32 float real255 = 255.0f; #ifdef _WIN32 float real128 = 128.0f; __declspec( naked ) void __fastcall SSEVec3TransformNormal() { __asm { mov DWORD PTR [g_normal][12], 0; movaps xmm4, DWORD PTR gRSPmodelViewTopTranspose; // row1 movaps xmm5, DWORD PTR gRSPmodelViewTopTranspose[0x10]; // row2 movaps xmm1, DWORD PTR [g_normal]; // xmm1 as the normal vector movaps xmm6, DWORD PTR gRSPmodelViewTopTranspose[0x20]; // row3 mulps xmm4, xmm1; // row 1 mulps xmm5, xmm1; // row 2 mulps xmm6, xmm1; // row 3 movhlps xmm0, xmm4; // xmm4 high to xmm0 low movlhps xmm0, xmm5; // xmm5 low to xmm0 high addps xmm4, xmm0; // result of add are in xmm4 low addps xmm5, xmm0; // result of add are in xmm5 high shufps xmm0, xmm4, 0x44; // move xmm4 low DWORDs to xmm0 high shufps xmm4, xmm5, 0xe4; // move xmm5 high DWORS to xmm4 movhlps xmm5, xmm0; // xmm4, xmm5 are mirrored shufps xmm4, xmm4, 0x08; // move xmm4's 3rd DWORD to its 2nd DWORD shufps xmm5, xmm5, 0x0d; // move xmm5's 4th DWORD to its 2nd DWORD, addps xmm4, xmm5; // results are in 1st and 2nd DWORD movaps xmm1,xmm4; mulps xmm1,xmm1; //square movlhps xmm7, xmm1; shufps xmm7, xmm7,0x03; addss xmm7, xmm1; movhlps xmm0, xmm6; // xmm6 high to xmm0 low addps xmm6, xmm0; // result of add are in xmm6 low movlhps xmm0, xmm6; shufps xmm0, xmm0, 0x03; addss xmm0, xmm6; // result of add is at xmm0's 1st DWORD movlhps xmm4, xmm0; mulss xmm0,xmm0; addss xmm7,xmm0; // xmm7 1st DWORD is the sum of squares #ifdef _DEBUG movaps DWORD PTR [g_normal], xmm4; movss DWORD PTR [g_normal][12], xmm7; #endif rsqrtss xmm7,xmm7; shufps xmm7,xmm7,0; #ifdef _DEBUG movss DWORD PTR [g_normal][12], xmm7; #endif mulps xmm4,xmm7; movaps DWORD PTR [g_normal], xmm4; // Normalized mov DWORD PTR [g_normal][12], 0; emms; ret; } } #else // _WIN32 extern "C" { void SSEVec3Transform(int i); void SSEVec3TransformNormal(); } #endif // _WIN32 void InitRenderBase() { if( status.isSSEEnabled ) { SetNewVertexInfo = SetNewVertexInfoSSE; } else { SetNewVertexInfo = SetNewVertexInfoNoSSE; } gRSPfFogMin = gRSP.fFogMul = gRSP.fFogOffset = gRSPfFogMax = 0.0f; windowSetting.fMultX = windowSetting.fMultY = 2.0f; windowSetting.vpLeftW = windowSetting.vpTopW = 0; windowSetting.vpRightW = windowSetting.vpWidthW = 640; windowSetting.vpBottomW = windowSetting.vpHeightW = 480; gRSP.maxZ = 0; gRSP.nVPLeftN = gRSP.nVPTopN = 0; gRSP.nVPRightN = 640; gRSP.nVPBottomN = 640; gRSP.nVPWidthN = 640; gRSP.nVPHeightN = 640; gRDP.scissor.left=gRDP.scissor.top=0; gRDP.scissor.right=gRDP.scissor.bottom=640; gRSP.bLightingEnable = gRSP.bTextureGen = false; gRSP.curTile=gRSPnumLights=gRSP.ambientLightColor=gRSP.ambientLightIndex= 0; gRSP.fAmbientLightR=gRSP.fAmbientLightG=gRSP.fAmbientLightB=0; gRSP.projectionMtxTop = gRSP.modelViewMtxTop = 0; gRDP.fogColor = gRDP.primitiveColor = gRDP.envColor = gRDP.primitiveDepth = gRDP.primLODMin = gRDP.primLODFrac = 0; gRDP.fPrimitiveDepth = 0; gRSP.numVertices = 0; gRSP.bCullFront=false; gRSP.bCullBack=true; gRSP.bFogEnabled=gRDP.bFogEnableInBlender=false; gRSP.bZBufferEnabled=true; gRSP.shadeMode=SHADE_SMOOTH; gRDP.keyR=gRDP.keyG=gRDP.keyB=gRDP.keyA=gRDP.keyRGB=gRDP.keyRGBA = 0; gRDP.fKeyA = 0; gRSP.DKRCMatrixIndex = gRSP.dwDKRVtxAddr = gRSP.dwDKRMatrixAddr = 0; gRSP.DKRVtxAddBase = false; gRSP.fTexScaleX = 1/32.0f; gRSP.fTexScaleY = 1/32.0f; gRSP.bTextureEnabled = FALSE; gRSP.clip_ratio_left = 0; gRSP.clip_ratio_top = 0; gRSP.clip_ratio_right = 640; gRSP.clip_ratio_bottom = 480; gRSP.clip_ratio_negx = 1; gRSP.clip_ratio_negy = 1; gRSP.clip_ratio_posx = 1; gRSP.clip_ratio_posy = 1; gRSP.real_clip_scissor_left = 0; gRSP.real_clip_scissor_top = 0; gRSP.real_clip_scissor_right = 640; gRSP.real_clip_scissor_bottom = 480; gRSP.real_clip_ratio_negx = 1; gRSP.real_clip_ratio_negy = 1; gRSP.real_clip_ratio_posx = 1; gRSP.real_clip_ratio_posy = 1; gRSP.DKRCMatrixIndex=0; gRSP.DKRVtxCount=0; gRSP.DKRVtxAddBase = false; gRSP.dwDKRVtxAddr=0; gRSP.dwDKRMatrixAddr=0; gRDP.geometryMode = 0; gRDP.otherModeL = 0; gRDP.otherModeH = 0; gRDP.fillColor = 0xFFFFFFFF; gRDP.originalFillColor =0; gRSP.ucode = 1; gRSP.vertexMult = 10; gRSP.bNearClip = false; gRSP.bRejectVtx = false; gRDP.texturesAreReloaded = false; gRDP.textureIsChanged = false; memset(&gRDP.otherMode,0,sizeof(RDP_OtherMode)); memset(&gRDP.tiles,0,sizeof(Tile)*8); for( int i=0; iIsTexel1Enable() ) { DaedalusRenderTexture &tex1 = g_textures[(gRSP.curTile+1)&7]; CTexture *surf = tex1.m_pCTexture; Tile &tile1 = gRDP.tiles[(gRSP.curTile+1)&7]; gRSP.tex1scaleX = scaleX * tile1.fShiftScaleS/tex1.m_fTexWidth; gRSP.tex1scaleY = scaleY * tile1.fShiftScaleT/tex1.m_fTexHeight; gRSP.tex1OffsetX = tile1.hilite_sl/tex1.m_fTexWidth; gRSP.tex1OffsetY = tile1.hilite_tl/tex1.m_fTexHeight; } gRSP.texGenXRatio = tile0.fShiftScaleS; gRSP.texGenYRatio = gRSP.fTexScaleX/gRSP.fTexScaleY*tex0.m_fTexWidth/tex0.m_fTexHeight*tile0.fShiftScaleT; } void InitVertex(TLITVERTEX &v, DWORD dwV, DWORD vtxIndex, bool bTexture, bool openGL) { StartProfiler(PROFILE_TRANSFORMATING); if( openGL ) { g_vtxProjected5[vtxIndex][0] = g_vtxTransformed[dwV].x; g_vtxProjected5[vtxIndex][1] = g_vtxTransformed[dwV].y; g_vtxProjected5[vtxIndex][2] = g_vtxTransformed[dwV].z; g_vtxProjected5[vtxIndex][3] = g_vtxTransformed[dwV].w; g_vtxProjected5[vtxIndex][4] = g_fFogCoord[dwV]; g_vtxIndex[vtxIndex] = vtxIndex; } else { v.x = g_vecProjected[dwV].x*gRSP.vtxXMul+gRSP.vtxXAdd; v.y = g_vecProjected[dwV].y*gRSP.vtxYMul+gRSP.vtxYAdd; v.z = (g_vecProjected[dwV].z + 1.0f) * 0.5f; // DirectX minZ=0, maxZ=1 v.rhw = g_vecProjected[dwV].w; if( gRSP.bProcessSpecularColor ) { v.dcSpecular = CDaedalusRender::g_pRender->PostProcessSpecularColor(); if( gRSP.bFogEnabled ) { v.dcSpecular &= 0x00FFFFFF; DWORD fogFct = 0xFF-(BYTE)((g_fFogCoord[dwV]-gRSPfFogMin)*gRSPfFogDivider); v.dcSpecular |= (fogFct<<24); } } else if( gRSP.bFogEnabled ) { DWORD fogFct = 0xFF-(BYTE)((g_fFogCoord[dwV]-gRSPfFogMin)*gRSPfFogDivider); v.dcSpecular = (fogFct<<24); } } #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_VERTEX_CMD || eventToPause == NEXT_MATRIX_CMD || eventToPause == NEXT_FLUSH_TRI )) && logTriDetails ) { DebuggerAppendMsg(" : %f, %f, %f, %f", v.x,v.y,v.z,v.rhw); } #endif v.dcDiffuse = g_dwVecDiffuseCol[dwV]; if( gRDP.otherMode.key_en ) { v.dcDiffuse &= 0x00FFFFFF; v.dcDiffuse |= (gRDP.keyA<<24); } else if( gRDP.otherMode.aa_en && gRDP.otherMode.clr_on_cvg==0 ) { v.dcDiffuse |= 0xFF000000; } if( gRSP.bProcessDiffuseColor ) { v.dcDiffuse = CDaedalusRender::g_pRender->PostProcessDiffuseColor(v.dcDiffuse); } if( options.gamma_correction ) { v.dcDiffuse = GammaCorrection(v.dcDiffuse); } #ifdef _DEBUG if( options.bWinFrameMode ) { v.dcDiffuse = g_dwVecDiffuseCol[dwV]; } #endif if( openGL ) { g_oglVtxColors[vtxIndex][0] = v.r; g_oglVtxColors[vtxIndex][1] = v.g; g_oglVtxColors[vtxIndex][2] = v.b; g_oglVtxColors[vtxIndex][3] = v.a; } if( bTexture ) { // If the vert is already lit, then there is no normal (and hence we can't generate tex coord) // Only scale if not generated automatically if (gRSP.bTextureGen && gRSP.bLightingEnable) { // Correction for texGen result //DaedalusRenderTexture &tex0 = g_textures[gRSP.curTile]; //float y = g_vecTexture[dwV].y/gRSP.fTexScaleY*gRSP.fTexScaleX*tex0.m_fTexWidth/tex0.m_fTexHeight; CDaedalusRender::g_pRender->SetVertexTextureUVCoord(v, g_vecTexture[dwV].x*gRSP.texGenXRatio, g_vecTexture[dwV].y*gRSP.texGenYRatio); } else { float tex0u = g_vecTexture[dwV].x *gRSP.tex0scaleX - gRSP.tex0OffsetX ; float tex0v = g_vecTexture[dwV].y *gRSP.tex0scaleY - gRSP.tex0OffsetY ; if( CDaedalusRender::g_pRender->IsTexel1Enable() ) { float tex1u = g_vecTexture[dwV].x *gRSP.tex1scaleX - gRSP.tex1OffsetX ; float tex1v = g_vecTexture[dwV].y *gRSP.tex1scaleY - gRSP.tex1OffsetY ; CDaedalusRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v, tex1u, tex1v); } else { CDaedalusRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v); } } } StopProfiler(PROFILE_TRANSFORMATING); DEBUGGER_ONLY_IF( logTriDetails, { DebuggerAppendMsg("Vertex: %d: (%f, %f, %f, %f), U=%f, V=%f\n DIF(%08X), SPE(%08X)\n", dwV, g_vecProjected[dwV].x, g_vecProjected[dwV].y, g_vecProjected[dwV].z, g_vecProjected[dwV].w, g_vecTexture[dwV].x, g_vecTexture[dwV].y, v.dcDiffuse, v.dcSpecular ); if(bTexture) DebuggerAppendMsg("\tT0(%f,%f), T1(%f,%f)\n", v.tcord[0].u, v.tcord[0].v, v.tcord[1].u, v.tcord[1].v ); } ); } #ifdef USING_INT_MATRIX DWORD LightVertInt(int norms[3]) { int cosT; int r = gRSP.iAmbientLightR; int g = gRSP.iAmbientLightG; int b = gRSP.iAmbientLightB; for (register unsigned int l=0; l < gRSP.numLights; l++) { cosT = norms[0]*gRSP.n64lights[l].x + norms[1]*gRSP.n64lights[l].y + norms[2]*gRSP.n64lights[l].z; if (cosT > 0) { r += gRSPlights[l].r * cosT / 256; g += gRSPlights[l].g * cosT / 256; b += gRSPlights[l].b * cosT / 256; } } if (r > 255) r = 255; if (g > 255) g = 255; if (b > 255) b = 255; return ((0xff000000)|(((DWORD)r)<<16)|(((DWORD)g)<<8)|((DWORD)b)); } #endif DWORD LightVert(D3DXVECTOR4 & norm) { float fCosT; // Do ambient register float r = gRSP.fAmbientLightR; register float g = gRSP.fAmbientLightG; register float b = gRSP.fAmbientLightB; for (register unsigned int l=0; l < gRSPnumLights; l++) { fCosT = norm.x*gRSPlights[l].x + norm.y*gRSPlights[l].y + norm.z*gRSPlights[l].z; if (fCosT > 0) { r += gRSPlights[l].fr * fCosT; g += gRSPlights[l].fg * fCosT; b += gRSPlights[l].fb * fCosT; } } if (r > 255) r = 255; if (g > 255) g = 255; if (b > 255) b = 255; return ((0xff000000)|(((DWORD)r)<<16)|(((DWORD)g)<<8)|((DWORD)b)); } float zero = 0.0f; #ifdef _WIN32 float fcosT; __m128 cosT128; __m64 icolor64; __m128 icolor128; __declspec( naked ) DWORD __fastcall SSELightVert() { /* register __m128 color128 = _mm_set_ps( gRSP.fAmbientLightR, gRSP.fAmbientLightG, gRSP.fAmbientLightB, 0 ); register __m128 normals = _mm_set_ps(norm.x , norm.y , norm.z , 0 ); for( register unsigned int l=0; l < gRSP.numLights; l++ ) { register __m128 lightdir = _mm_set_ps(gRSPlights[l].x, gRSPlights[l].y, gRSPlights[l].z, 0); cosT128 = _mm_mul_ps(lightdir,normals); fcosT = cosT128.m128_f32[3]+cosT128.m128_f32[2]+cosT128.m128_f32[1]; if (fcosT > 0) { cosT128 = _mm_set_ps1(fcosT); register __m128 lightcolor = _mm_set_ps(gRSPlights[l].fr, gRSPlights[l].fg, gRSPlights[l].fb, 0); lightcolor = _mm_mul_ps(lightcolor,cosT128); color128 = _mm_add_ps(color128,lightcolor); } } color128 = _mm_min_ps(color128,_mm_set_ps1(255.0f)); color64 = _mm_cvtps_pi16(color128); register DWORD finalcolor = ((0xff000000)|(color64.m64_u16[3]<<16)|(color64.m64_u16[2]<<8)|color64.m64_u16[1]); _mm_empty(); //return finalcolor; */ __asm { movaps xmm3, DWORD PTR gRSP; // loading Ambient colors, xmm3 is the result color movaps xmm4, DWORD PTR [g_normal]; // xmm4 is the normal mov ecx, 0; loopback: cmp ecx, DWORD PTR gRSPnumLights; jae breakout; mov eax,ecx; imul eax,0x28; movups xmm5, DWORD PTR gRSPlights[eax]; // Light Dir movups xmm1, DWORD PTR gRSPlights[0x18][eax]; // Light color mulps xmm5, xmm4; // Lightdir * normals movlhps xmm0,xmm5; addps xmm0,xmm5; shufps xmm5,xmm0,0xf0; // xmm5 3rd float = xmm0 4th float addps xmm0,xmm5; shufps xmm0,xmm0,0x02; // xmm0 1st float = xmm0 3rd float comiss xmm0,zero; jc endloop shufps xmm0,xmm0,0; // fcosT mulps xmm1,xmm0; addps xmm3,xmm1; endloop: inc ecx; jmp loopback; breakout: movss xmm0,DWORD PTR real255; shufps xmm0,xmm0,0; minps xmm0,xmm3; // Without using a memory cvtss2si eax,xmm0; // move the 1st DWORD to eax shl eax,10h; or eax,0FF000000h; shufps xmm0,xmm0,0E5h; // move the 2nd DWORD to the 1st DWORD cvtss2si ecx,xmm0; // move the 1st DWORD to ecx shl ecx,8; or eax,ecx; shufps xmm0,xmm0,0E6h; // Move the 3rd DWORD to the 1st DWORD cvtss2si ecx,xmm0; or eax,ecx; ret; } } #else // _WIN32 extern "C" { DWORD SSELightVert(); } #endif // _WIN32 // Bits // +-+-+- // xxyyzz #define Z_NEG 0x01 #define Z_POS 0x02 #define Y_NEG 0x04 #define Y_POS 0x08 #define X_NEG 0x10 #define X_POS 0x20 // Assumes dwAddress has already been checked! // Don't inline - it's too big with the transform macros void SetNewVertexInfoSSE(DWORD dwAddress, DWORD dwV0, DWORD dwNum) { // This function is called upon SPvertex // - do vertex matrix transform // - do vertex lighting // - do texture cooridinate transform if needed // - calculate normal vector // Output: - g_vecProjected[i] -> transformed vertex x,y,z // - g_vecProjected[i].w -> saved vertex 1/w // - g_dwVecFlags[i] -> flags // - g_dwVecDiffuseCol[i] -> vertex color // - g_vecTexture[i] -> vertex texture cooridinates StartProfiler(PROFILE_TRANSFORMATING); FiddledVtx * pVtxBase = (FiddledVtx*)(g_pu8RamBase + dwAddress); g_pVtxBase = pVtxBase; DWORD i; for (i = dwV0; i < dwV0 + dwNum; i++) { SP_Timing(DLParser_GBI0_Vtx); FiddledVtx & vert = pVtxBase[i - dwV0]; g_vtxNonTransformed[i].x = (float)vert.x; g_vtxNonTransformed[i].y = (float)vert.y; g_vtxNonTransformed[i].z = (float)vert.z; SSEVec3Transform(i); if( gRSP.bFogEnabled ) { /* if( g_vecProjected[i].w < 0 || g_vecProjected[i].z < gRSPfFogMin ) g_fFogCoord[i] = gRSPfFogMin; else g_fFogCoord[i] = g_vecProjected[i].z; *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = (BYTE)(g_fFogCoord[i]*255); */ #ifdef _WIN32 __asm { mov eax, i; shl eax, 2 movss xmm0, dword ptr g_vecProjected[eax*4][8]; movss xmm1, dword ptr g_vecProjected[eax*4][12]; comiss xmm1,zero; jc step2; comiss xmm0, dword ptr gRSPfFogMin; jc step2; jmp step3; step2: movss xmm0, dword ptr gRSPfFogMin; step3: movss dword ptr g_fFogCoord[eax], xmm0; mulss xmm0, real255; cvtss2si ecx,xmm0; // move the 1st DWORD to ecx mov byte ptr g_dwVecDiffuseCol[eax][3], cl; } #else // _WIN32 asm("shl $2, %%eax \n" "movss g_vecProjected+8(,%%eax,4), %%xmm0 \n" "movss g_vecProjected+12(,%%eax,4),%%xmm1 \n" "comiss zero, %%xmm1 \n" "jc step2 \n" "comiss gRSPfFogMin, %%xmm0 \n" "jc step2 \n" "jmp step3 \n" "step2: \n" "movss gRSPfFogMin, %%xmm0 \n" "step3: \n" "movss %%xmm0, g_fFogCoord(,%%eax) \n" "mulss real255, %%xmm0 \n" "cvtss2si %%xmm0, %%ecx \n" "mov %%cl, g_dwVecDiffuseCol+3(,%%eax) \n" : : "a" (i) : "cc", "memory", "%xmm0", "%xmm1", "%ecx"); #endif // _WIN32 } #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_VERTEX_CMD || eventToPause == NEXT_MATRIX_CMD || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DWORD *dat = (DWORD*)(&vert); DebuggerAppendMsg("vtx %d: %08X %08X %08X %08X", i, dat[0],dat[1],dat[2],dat[3]); DebuggerAppendMsg(" : %f, %f, %f, %f", g_vtxTransformed[i].x,g_vtxTransformed[i].y,g_vtxTransformed[i].z,g_vtxTransformed[i].w); DebuggerAppendMsg(" : %f, %f, %f, %f", g_vecProjected[i].x,g_vecProjected[i].y,g_vecProjected[i].z,g_vecProjected[i].w); } #endif RSP_Vtx_Clipping(i); if( gRSP.bLightingEnable ) { g_normal.x = (float)vert.norma.nx; g_normal.y = (float)vert.norma.ny; g_normal.z = (float)vert.norma.nz; SSEVec3TransformNormal(); g_dwVecDiffuseCol[i] = SSELightVert(); *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = vert.rgba.a; // still use alpha from the vertex } else { if( (gRDP.geometryMode & G_SHADE) == 0 && gRSP.ucode < 5 ) //Shade is disabled { //FLAT shade g_dwVecDiffuseCol[i] = gRDP.primitiveColor; } else { register IColor &color = *(IColor*)&g_dwVecDiffuseCol[i]; color.b = vert.rgba.r; color.g = vert.rgba.g; color.r = vert.rgba.b; color.a = vert.rgba.a; } } #ifdef _DEBUG if( options.bWinFrameMode ) { g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(vert.rgba.r, vert.rgba.g, vert.rgba.b, vert.rgba.a); } #endif // Update texture coords n.b. need to divide tu/tv by bogus scale on addition to buffer // If the vert is already lit, then there is no normal (and hence we // can't generate tex coord) if (gRSP.bTextureGen && gRSP.bLightingEnable ) { DaedalusMatrix & matWV = gRSP.modelviewMtxs[gRSP.modelViewMtxTop]; // Assign the spheremap's texture coordinates g_vecTexture[i].x = (0.5f * ( 1.0f + ( g_normal.x*matWV._11 + g_normal.y*matWV._21 + g_normal.z*matWV._31 ) )); g_vecTexture[i].y = (0.5f * ( 1.0f - ( g_normal.x*matWV._12 + g_normal.y*matWV._22 + g_normal.z*matWV._32 ) )); } else { g_vecTexture[i].x = (float)vert.tu; g_vecTexture[i].y = (float)vert.tv; } } StopProfiler(PROFILE_TRANSFORMATING); #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_TRIANGLE || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DebuggerAppendMsg("Setting Vertexes: %d - %d\n", dwV0, dwV0+dwNum-1); } DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{TRACE0("Paused at Vertex Cmd");}); #endif } void SetNewVertexInfoNoSSE(DWORD dwAddress, DWORD dwV0, DWORD dwNum) { // This function is called upon SPvertex // - do vertex matrix transform // - do vertex lighting // - do texture cooridinate transform if needed // - calculate normal vector // Output: - g_vecProjected[i] -> transformed vertex x,y,z // - g_vecProjected[i].w -> saved vertex 1/w // - g_dwVecFlags[i] -> flags // - g_dwVecDiffuseCol[i] -> vertex color // - g_vecTexture[i] -> vertex texture cooridinates StartProfiler(PROFILE_TRANSFORMATING); FiddledVtx * pVtxBase = (FiddledVtx*)(g_pu8RamBase + dwAddress); g_pVtxBase = pVtxBase; DWORD i; for (i = dwV0; i < dwV0 + dwNum; i++) { SP_Timing(DLParser_GBI0_Vtx); FiddledVtx & vert = pVtxBase[i - dwV0]; #ifdef USING_INT_MATRIX IntVec3Transform(g_vtxTransformed[i], vert, gRSPworldProjectInt); #else g_vtxNonTransformed[i].x = (float)vert.x; g_vtxNonTransformed[i].y = (float)vert.y; g_vtxNonTransformed[i].z = (float)vert.z; D3DXVec3Transform(&g_vtxTransformed[i], (D3DXVECTOR3*)&g_vtxNonTransformed[i], &gRSPworldProject); // Convert to w=1 #endif g_vecProjected[i].w = 1.0f / g_vtxTransformed[i].w; g_vecProjected[i].x = g_vtxTransformed[i].x * g_vecProjected[i].w; g_vecProjected[i].y = g_vtxTransformed[i].y * g_vecProjected[i].w; if( g_curRomInfo.bPrimaryDepthHack && gRDP.otherMode.depth_source ) { g_vecProjected[i].z = gRDP.fPrimitiveDepth; g_vtxTransformed[i].z = gRDP.fPrimitiveDepth*g_vtxTransformed[i].w; } else { g_vecProjected[i].z = g_vtxTransformed[i].z * g_vecProjected[i].w; } if( gRSP.bFogEnabled ) { g_fFogCoord[i] = g_vecProjected[i].z; if( g_vecProjected[i].w < 0 || g_vecProjected[i].z < 0 || g_fFogCoord[i] < gRSPfFogMin ) g_fFogCoord[i] = gRSPfFogMin; } #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_VERTEX_CMD || eventToPause == NEXT_MATRIX_CMD || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DWORD *dat = (DWORD*)(&vert); DebuggerAppendMsg("vtx %d: %08X %08X %08X %08X", i, dat[0],dat[1],dat[2],dat[3]); DebuggerAppendMsg(" : %f, %f, %f, %f", g_vtxTransformed[i].x,g_vtxTransformed[i].y,g_vtxTransformed[i].z,g_vtxTransformed[i].w); DebuggerAppendMsg(" : %f, %f, %f, %f", g_vecProjected[i].x,g_vecProjected[i].y,g_vecProjected[i].z,g_vecProjected[i].w); } #endif RSP_Vtx_Clipping(i); if( gRSP.bLightingEnable ) { #ifdef USING_INT_MATRIX IntVec3TransformNormal(norms,vert.norma,gRSP.modelViewIntTop); extern DWORD LightVertInt(int norms[3]); g_dwVecDiffuseCol[i] = LightVertInt(norms); #else g_normal.x = (float)vert.norma.nx; g_normal.y = (float)vert.norma.ny; g_normal.z = (float)vert.norma.nz; Vec3TransformNormal(g_normal, gRSPmodelViewTop); g_dwVecDiffuseCol[i] = LightVert(g_normal); #endif *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = vert.rgba.a; // still use alpha from the vertex } else { if( (gRDP.geometryMode & G_SHADE) == 0 && gRSP.ucode < 5 ) //Shade is disabled { //FLAT shade g_dwVecDiffuseCol[i] = gRDP.primitiveColor; } else { register IColor &color = *(IColor*)&g_dwVecDiffuseCol[i]; color.b = vert.rgba.r; color.g = vert.rgba.g; color.r = vert.rgba.b; color.a = vert.rgba.a; } } #ifdef _DEBUG if( options.bWinFrameMode ) { g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(vert.rgba.r, vert.rgba.g, vert.rgba.b, vert.rgba.a); } #endif //if( gRDP.geometryMode & G_FOG ) if( gRSP.bFogEnabled ) { // Use fog factor to replace vertex alpha *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = (BYTE)(g_fFogCoord[i]*255); } // Update texture coords n.b. need to divide tu/tv by bogus scale on addition to buffer // If the vert is already lit, then there is no normal (and hence we // can't generate tex coord) if (gRSP.bTextureGen && gRSP.bLightingEnable ) { DaedalusMatrix & matWV = gRSP.modelviewMtxs[gRSP.modelViewMtxTop]; // Assign the spheremap's texture coordinates g_vecTexture[i].x = (0.5f * ( 1.0f + ( g_normal.x*matWV._11 + g_normal.y*matWV._21 + g_normal.z*matWV._31 ) )); g_vecTexture[i].y = (0.5f * ( 1.0f - ( g_normal.x*matWV._12 + g_normal.y*matWV._22 + g_normal.z*matWV._32 ) )); } else { g_vecTexture[i].x = (float)vert.tu; g_vecTexture[i].y = (float)vert.tv; } } StopProfiler(PROFILE_TRANSFORMATING); #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_TRIANGLE || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DebuggerAppendMsg("Setting Vertexes: %d - %d\n", dwV0, dwV0+dwNum-1); } DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{TRACE0("Paused at Vertex Cmd");}); #endif } void AddTri(DWORD dwV0, DWORD dwV1, DWORD dwV2) { #ifdef ENABLE_CLIP_TRI //if( gRSP.bRejectVtx && (g_clipFlag[dwV0]|g_clipFlag[dwV1]|g_clipFlag[dwV2]) ) // return; if( g_clipFlag2[dwV0]&g_clipFlag2[dwV1]&g_clipFlag2[dwV2] ) return; #endif SP_Timing(SP_Each_Triangle); bool textureFlag = (CDaedalusRender::g_pRender->IsTextureEnabled() || gRSP.ucode == 6 ); bool openGL = CDeviceBuilder::m_deviceGeneralType == OGL_DEVICE; InitVertex(g_ucVertexBuffer[gRSP.numVertices + 0], dwV0, gRSP.numVertices, textureFlag, openGL); InitVertex(g_ucVertexBuffer[gRSP.numVertices + 1], dwV1, gRSP.numVertices+1, textureFlag, openGL); InitVertex(g_ucVertexBuffer[gRSP.numVertices + 2], dwV2, gRSP.numVertices+2, textureFlag, openGL); /* if (gRDP.otherMode.zmode == 3 )//|| CDaedalusRender::g_pRender->m_dwZBias > 0 ) { // Bias points by normal g_vtxProjected5[gRSP.numVertices+0][2] -= 0.001f*g_vtxTransformed[dwV0].w; g_vtxProjected5[gRSP.numVertices+1][2] -= 0.001f*g_vtxTransformed[dwV1].w; g_vtxProjected5[gRSP.numVertices+2][2] -= 0.001f*g_vtxTransformed[dwV2].w; g_ucVertexBuffer[gRSP.numVertices + 0].z -= 0.001f; g_ucVertexBuffer[gRSP.numVertices + 1].z -= 0.001f; g_ucVertexBuffer[gRSP.numVertices + 2].z -= 0.001f; } */ gRSP.numVertices += 3; status.dwNumTrisRendered++; } // Returns TRUE if it thinks the triangle is visible // Returns FALSE if it is clipped bool TestTri(DWORD dwV0, DWORD dwV1, DWORD dwV2) { //return true; //fix me DEBUGGER_ONLY_IF( (!debuggerEnableTestTris || !debuggerEnableCullFace), {return TRUE;}); #ifdef _DEBUG // Check vertices are valid! if (dwV0 >= MAX_VERTS || dwV1 >= MAX_VERTS || dwV2 >= MAX_VERTS) return false; #endif // Here we AND all the flags. If any of the bits is set for all // 3 vertices, it means that all three x, y or z lie outside of // the current viewing volume. // Currently disabled - still seems a bit dodgy if ((gRSP.bCullFront || gRSP.bCullBack) && gRDP.otherMode.zmode != 3) { D3DXVECTOR4 & v0 = g_vecProjected[dwV0]; D3DXVECTOR4 & v1 = g_vecProjected[dwV1]; D3DXVECTOR4 & v2 = g_vecProjected[dwV2]; // Only try to clip if the tri is onscreen. For some reason, this // method doesnt' work well when the z value is outside of screenspace if (v0.z < 1 && v1.z < 1 && v2.z < 1) { float V1 = v2.x - v0.x; float V2 = v2.y - v0.y; float W1 = v2.x - v1.x; float W2 = v2.y - v1.y; float fDirection = (V1 * W2) - (V2 * W1); //float fDirection = v0.x*v1.y-v1.x*v0.y+v1.x*v2.y-v2.x*v1.y+v2.x*v0.y-v0.x*v2.y; /* */ if (fDirection < 0 && gRSP.bCullBack) { status.dwNumTrisClipped++; return false; } else if (fDirection > 0 && gRSP.bCullFront) { status.dwNumTrisClipped++; return false; } } } return true; } void SetPrimitiveColor(DWORD dwCol, DWORD LODMin, DWORD LODFrac) { //dwCol = GammaCorrection(dwCol); gRDP.primitiveColor = dwCol; gRDP.primLODMin = LODMin; gRDP.primLODFrac = LODFrac; if( gRDP.primLODFrac < gRDP.primLODMin ) { gRDP.primLODFrac = gRDP.primLODMin; } gRDP.fvPrimitiveColor[0] = ((dwCol>>16)&0xFF)/255.0f; //r gRDP.fvPrimitiveColor[1] = ((dwCol>>8)&0xFF)/255.0f; //g gRDP.fvPrimitiveColor[2] = ((dwCol)&0xFF)/255.0f; //b gRDP.fvPrimitiveColor[3] = ((dwCol>>24)&0xFF)/255.0f; //a } void SetPrimitiveDepth(DWORD z, DWORD dwDZ) { gRDP.primitiveDepth = z&0x7FFF; //if( gRDP.primitiveDepth != 0 && gRDP.primitiveDepth != 0xFFFF ) { gRDP.fPrimitiveDepth = (float)(gRDP.primitiveDepth)/(float)0x8000; //gRDP.fPrimitiveDepth = gRDP.fPrimitiveDepth*2-1; /* z=0xFFFF -> 1 the farest z=0 -> -1 the nearest */ } //how to use dwDZ? #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_VERTEX_CMD || eventToPause == NEXT_FLUSH_TRI )) )//&& logTriDetails ) { DebuggerAppendMsg("Set prim Depth: %f, (%08X, %08X)", gRDP.fPrimitiveDepth, z, dwDZ); } #endif } void SetVertexXYZ(DWORD vertex, float x, float y, float z) { g_vecProjected[vertex].x = x; g_vecProjected[vertex].y = y; g_vecProjected[vertex].z = z; g_vecProjected[vertex].w = 1/z; } extern DWORD dwTvSystem; void ModifyVertexInfo(DWORD where, DWORD vertex, DWORD val) { switch (where) { case G_MWO_POINT_RGBA: // Modify RGBA { DWORD r = (val>>24)&0xFF; DWORD g = (val>>16)&0xFF; DWORD b = (val>>8)&0xFF; DWORD a = val&0xFF; g_dwVecDiffuseCol[vertex] = DAEDALUS_COLOR_RGBA(r, g, b, a); DL_PF("Modify vert %d color, 0x%08x", vertex, g_dwVecDiffuseCol[vertex]); } break; case G_MWO_POINT_XYSCREEN: // Modify X,Y { u16 nX = (u16)(val>>16); s16 x = *((s16*)&nX); x /= 4; u16 nY = u16(val&0xFFFF); s16 y = *((s16*)&nY); y /= 4; // Should do viewport transform. x -= windowSetting.uViWidth/2; y = windowSetting.uViHeight/2-y; if( options.enableHacks && ((*g_GraphicsInfo.VI_X_SCALE_REG)&0xF) != 0 ) { // Tarzan // I don't know why Tarzan is different SetVertexXYZ(vertex, x/windowSetting.fViWidth, y/windowSetting.fViHeight, g_vecProjected[vertex].z); } else { // Toy Story 2 and other games SetVertexXYZ(vertex, x*2/windowSetting.fViWidth, y*2/windowSetting.fViHeight, g_vecProjected[vertex].z); } DL_PF("Modify vert %d: x=%d, y=%d", vertex, x, y); DEBUGGER_IF_DUMP( (logTriDetailsWithVertexMtx||(pauseAtNext && eventToPause==NEXT_VERTEX_CMD)), {DebuggerAppendMsg("Modify vert %d: (%d,%d)", vertex, x, y);}); } break; case G_MWO_POINT_ZSCREEN: // Modify C { int z = val>>16; SetVertexXYZ(vertex, g_vecProjected[vertex].x, g_vecProjected[vertex].y, (((float)z/0x03FF)+0.5f)/2.0f ); DL_PF("Modify vert %d: z=%d", vertex, z); DEBUGGER_IF_DUMP( (logTriDetailsWithVertexMtx||(pauseAtNext && eventToPause==NEXT_VERTEX_CMD)), {DebuggerAppendMsg("Modify vert %d: z=%d", vertex, z);}); } break; case G_MWO_POINT_ST: // Texture { short tu = short(val>>16); short tv = short(val & 0xFFFF); float ftu = tu / 32.0f; float ftv = tv / 32.0f; DL_PF(" Setting vertex %d tu/tv to %f, %f", vertex, (float)tu, (float)tv); CDaedalusRender::g_pRender->SetVtxTextureCoord(vertex, ftu/gRSP.fTexScaleX, ftv/gRSP.fTexScaleY); } break; } DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{TRACE0("Paused at ModVertex Cmd");}); } void SetNewVertexInfoDKR(DWORD dwAddress, DWORD dwV0, DWORD dwNum) { uint32 pVtxBase = (uint32)(g_pu8RamBase + dwAddress); g_pVtxBase = (FiddledVtx*)pVtxBase; DaedalusMatrix &matWorldProject = gRSP.DKRMatrixes[gRSP.DKRCMatrixIndex]; DWORD i; LONG nOff; bool addbase=false; if ((!gRSP.DKRVtxAddBase) || (gRSP.DKRCMatrixIndex != 2) ) addbase = false; else addbase = true; if( addbase && gRSP.DKRVtxCount == 0 && dwNum > 1 ) { gRSP.DKRVtxCount++; } DL_PF(" SetNewVertexInfoDKR, CMatrix = %d, Add base=%s", gRSP.DKRCMatrixIndex, gRSP.DKRVtxAddBase?"true":"false"); #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_TRIANGLE || eventToPause == NEXT_MATRIX_CMD || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { TRACE0("DKR Setting Vertexes\n"); DebuggerAppendMsg("CMatrix = %d, Add base=%s", gRSP.DKRCMatrixIndex, gRSP.DKRVtxAddBase?"true":"false"); } #endif nOff = 0; DWORD end = dwV0 + dwNum; for (i = dwV0; i < end; i++) { D3DXVECTOR3 w; g_vtxNonTransformed[i].x = (float)*(s16*)((pVtxBase+nOff + 0) ^ 2); g_vtxNonTransformed[i].y = (float)*(s16*)((pVtxBase+nOff + 2) ^ 2); g_vtxNonTransformed[i].z = (float)*(s16*)((pVtxBase+nOff + 4) ^ 2); //if( status.isSSEEnabled ) // SSEVec3TransformDKR(g_vtxTransformed[i], g_vtxNonTransformed[i]); //else D3DXVec3Transform(&g_vtxTransformed[i], (D3DXVECTOR3*)&g_vtxNonTransformed[i], &matWorldProject); // Convert to w=1 if( gRSP.DKRVtxCount == 0 && dwNum==1 ) { gRSP.DKRBaseVec.x = g_vtxTransformed[i].x; gRSP.DKRBaseVec.y = g_vtxTransformed[i].y; gRSP.DKRBaseVec.z = g_vtxTransformed[i].z; gRSP.DKRBaseVec.w = g_vtxTransformed[i].w; } else if( addbase ) { g_vtxTransformed[i].x += gRSP.DKRBaseVec.x; g_vtxTransformed[i].y += gRSP.DKRBaseVec.y; g_vtxTransformed[i].z += gRSP.DKRBaseVec.z; g_vtxTransformed[i].w = gRSP.DKRBaseVec.w; } g_vecProjected[i].w = 1.0f / g_vtxTransformed[i].w; g_vecProjected[i].x = g_vtxTransformed[i].x * g_vecProjected[i].w; g_vecProjected[i].y = g_vtxTransformed[i].y * g_vecProjected[i].w; g_vecProjected[i].z = g_vtxTransformed[i].z * g_vecProjected[i].w; gRSP.DKRVtxCount++; #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_TRIANGLE || eventToPause == NEXT_MATRIX_CMD || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DebuggerAppendMsg("vtx %d: %f, %f, %f, %f", i, g_vtxTransformed[i].x,g_vtxTransformed[i].y,g_vtxTransformed[i].z,g_vtxTransformed[i].w); } #endif if( gRSP.bFogEnabled ) { g_fFogCoord[i] = g_vecProjected[i].z; if( g_vecProjected[i].w < 0 || g_vecProjected[i].z < 0 || g_fFogCoord[i] < gRSPfFogMin ) g_fFogCoord[i] = gRSPfFogMin; } RSP_Vtx_Clipping(i); s16 wA = *(s16*)((pVtxBase+nOff + 6) ^ 2); s16 wB = *(s16*)((pVtxBase+nOff + 8) ^ 2); s8 r = (s8)(wA >> 8); s8 g = (s8)(wA); s8 b = (s8)(wB >> 8); s8 a = (s8)(wB); if (gRSP.bLightingEnable) { g_normal.x = (float)r; //norma.nx; g_normal.y = (float)g; //norma.ny; g_normal.z = (float)b; //norma.nz; Vec3TransformNormal(g_normal, matWorldProject) if( status.isSSESupported ) g_dwVecDiffuseCol[i] = SSELightVert(); else g_dwVecDiffuseCol[i] = LightVert(g_normal); } else { LONG nR, nG, nB, nA; nR = r; nG = g; nB = b; nA = a; // Assign true vert colour after lighting/fogging g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(nR, nG, nB, nA); } if( gRDP.geometryMode & G_FOG ) { // Use fog factor to replace vertex alpha *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = (BYTE)(g_fFogCoord[i]*255); } g_vecTexture[i].x = g_vecTexture[i].y = 1; nOff += 10; } DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{DebuggerAppendMsg("Paused at DKR Vertex Cmd, v0=%d, vn=%d, addr=%08X", dwV0, dwNum, dwAddress);}); } extern DWORD dwPDCIAddr; void SetNewVertexInfoPD(DWORD dwAddress, DWORD dwV0, DWORD dwNum) { StartProfiler(PROFILE_TRANSFORMATING); N64VtxPD * pVtxBase = (N64VtxPD*)(g_pu8RamBase + dwAddress); g_pVtxBase = (FiddledVtx*)pVtxBase; // Fix me for (u32 i = dwV0; i < dwV0 + dwNum; i++) { N64VtxPD &vert = pVtxBase[i - dwV0]; g_vtxNonTransformed[i].x = (float)vert.x; g_vtxNonTransformed[i].y = (float)vert.y; g_vtxNonTransformed[i].z = (float)vert.z; if( status.isSSEEnabled ) SSEVec3Transform(i); else { D3DXVec3Transform(&g_vtxTransformed[i], (D3DXVECTOR3*)&g_vtxNonTransformed[i], &gRSPworldProject); // Convert to w=1 g_vecProjected[i].w = 1.0f / g_vtxTransformed[i].w; g_vecProjected[i].x = g_vtxTransformed[i].x * g_vecProjected[i].w; g_vecProjected[i].y = g_vtxTransformed[i].y * g_vecProjected[i].w; g_vecProjected[i].z = g_vtxTransformed[i].z * g_vecProjected[i].w; } g_fFogCoord[i] = g_vecProjected[i].z; if( g_vecProjected[i].w < 0 || g_vecProjected[i].z < 0 || g_fFogCoord[i] < gRSPfFogMin ) g_fFogCoord[i] = gRSPfFogMin; RSP_Vtx_Clipping(i); BYTE *addr = g_pu8RamBase+dwPDCIAddr+vert.cidx; DWORD a = addr[0]; DWORD r = addr[1]; DWORD g = addr[2]; DWORD b = addr[3]; if( gRSP.bLightingEnable ) { g_normal.x = (float)r; g_normal.y = (float)g; g_normal.z = (float)a; if( status.isSSESupported ) { SSEVec3TransformNormal(); g_dwVecDiffuseCol[i] = SSELightVert(); } else { Vec3TransformNormal(g_normal, gRSPmodelViewTop); g_dwVecDiffuseCol[i] = LightVert(g_normal); } *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = (BYTE)a; // still use alpha from the vertex } else { if( (gRDP.geometryMode & G_SHADE) == 0 && gRSP.ucode < 5 ) //Shade is disabled { g_dwVecDiffuseCol[i] = gRDP.primitiveColor; } else //FLAT shade { g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(r, g, b, a); } } #ifdef _DEBUG if( options.bWinFrameMode ) { g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(r, g, b, a); } #endif if( gRDP.geometryMode & G_FOG ) { // Use fog factor to replace vertex alpha *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = (BYTE)(g_fFogCoord[i]*255); } VECTOR2 & t = g_vecTexture[i]; /* if (gRSP.bTextureGen && gRSP.bLightingEnable && g_textures[gRSP.curTile].m_bTextureEnable ) { DaedalusMatrix & matWV = gRSP.modelviewMtxs[gRSP.modelViewMtxTop]; // Assign the spheremap's texture coordinates t.x = (0.5f * ( 1.0f + ( g_normal.x*matWV._11 + g_normal.y*matWV._21 + g_normal.z*matWV._31 ) )); t.y = (0.5f * ( 1.0f - ( g_normal.x*matWV._12 + g_normal.y*matWV._22 + g_normal.z*matWV._32 ) )); } else */ { t.x = vert.s; t.y = vert.t; } } StopProfiler(PROFILE_TRANSFORMATING); #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_TRIANGLE || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DebuggerAppendMsg("Setting Vertexes: %d - %d\n", dwV0, dwV0+dwNum-1); } DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{TRACE0("Paused at Vertex Cmd");}); #endif } extern DWORD dwConkerVtxZAddr; void SetNewVertexInfoConker(DWORD dwAddress, DWORD dwV0, DWORD dwNum) { StartProfiler(PROFILE_TRANSFORMATING); FiddledVtx * pVtxBase = (FiddledVtx*)(g_pu8RamBase + dwAddress); g_pVtxBase = pVtxBase; s16 *vertexColoraddr = (s16*)(g_pu8RamBase+dwConkerVtxZAddr); DWORD i; for (i = dwV0; i < dwV0 + dwNum; i++) { SP_Timing(DLParser_GBI0_Vtx); FiddledVtx & vert = pVtxBase[i - dwV0]; g_vtxNonTransformed[i].x = (float)vert.x; g_vtxNonTransformed[i].y = (float)vert.y; g_vtxNonTransformed[i].z = (float)vert.z; if( status.isSSEEnabled ) SSEVec3Transform(i); else { D3DXVec3Transform(&g_vtxTransformed[i], (D3DXVECTOR3*)&g_vtxNonTransformed[i], &gRSPworldProject); // Convert to w=1 g_vecProjected[i].w = 1.0f / g_vtxTransformed[i].w; g_vecProjected[i].x = g_vtxTransformed[i].x * g_vecProjected[i].w; g_vecProjected[i].y = g_vtxTransformed[i].y * g_vecProjected[i].w; g_vecProjected[i].z = g_vtxTransformed[i].z * g_vecProjected[i].w; } g_fFogCoord[i] = g_vecProjected[i].z; if( g_vecProjected[i].w < 0 || g_vecProjected[i].z < 0 || g_fFogCoord[i] < gRSPfFogMin ) g_fFogCoord[i] = gRSPfFogMin; #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_VERTEX_CMD || eventToPause == NEXT_MATRIX_CMD || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DWORD *dat = (DWORD*)(&vert); DebuggerAppendMsg("vtx %d: %08X %08X %08X %08X", i, dat[0],dat[1],dat[2],dat[3]); DebuggerAppendMsg(" : %f, %f, %f, %f", g_vtxTransformed[i].x,g_vtxTransformed[i].y,g_vtxTransformed[i].z,g_vtxTransformed[i].w); DebuggerAppendMsg(" : %f, %f, %f, %f", g_vecProjected[i].x,g_vecProjected[i].y,g_vecProjected[i].z,g_vecProjected[i].w); } #endif RSP_Vtx_Clipping(i); if( gRSP.bLightingEnable ) { g_normal.x = (float)vert.norma.nx; g_normal.y = (float)vert.norma.ny; g_normal.z = (float)vert.norma.nz; if( status.isSSESupported ) { SSEVec3TransformNormal(); g_dwVecDiffuseCol[i] = SSELightVert(); } else { Vec3TransformNormal(g_normal, gRSPmodelViewTop); g_dwVecDiffuseCol[i] = LightVert(g_normal); } *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = vert.rgba.a; // still use alpha from the vertex } else { if( (gRDP.geometryMode & G_SHADE) == 0 && gRSP.ucode < 5 ) //Shade is disabled { g_dwVecDiffuseCol[i] = gRDP.primitiveColor; } else //FLAT shade { g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(vert.rgba.r, vert.rgba.g, vert.rgba.b, vert.rgba.a); } } #ifdef _DEBUG if( options.bWinFrameMode ) { //g_vecProjected[i].z = 0; g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(vert.rgba.r, vert.rgba.g, vert.rgba.b, vert.rgba.a); } #endif if( gRDP.geometryMode & G_FOG ) { // Use fog factor to replace vertex alpha *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = (BYTE)(g_fFogCoord[i]*255); } // Update texture coords n.b. need to divide tu/tv by bogus scale on addition to buffer VECTOR2 & t = g_vecTexture[i]; // If the vert is already lit, then there is no normal (and hence we // can't generate tex coord) if (gRSP.bTextureGen && gRSP.bLightingEnable ) { DaedalusMatrix & matWV = gRSP.modelviewMtxs[gRSP.modelViewMtxTop]; // Assign the spheremap's texture coordinates g_vecTexture[i].x = (0.5f * ( 1.0f + ( g_normal.x*matWV._11 + g_normal.y*matWV._21 + g_normal.z*matWV._31 ) )); g_vecTexture[i].y = (0.5f * ( 1.0f - ( g_normal.x*matWV._12 + g_normal.y*matWV._22 + g_normal.z*matWV._32 ) )); } else { g_vecTexture[i].x = (float)vert.tu; g_vecTexture[i].y = (float)vert.tv; } } StopProfiler(PROFILE_TRANSFORMATING); #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_TRIANGLE || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DebuggerAppendMsg("Setting Vertexes: %d - %d\n", dwV0, dwV0+dwNum-1); } DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{DebuggerAppendMsg("Paused at Vertex Cmd");}); #endif } typedef struct{ s16 y; s16 x; s16 flag; s16 z; } RS_Vtx_XYZ; typedef union { struct { BYTE a; BYTE b; BYTE g; BYTE r; }; struct { char na; char nz; //b char ny; //g char nx; //r }; } RS_Vtx_Color; void SetNewVertexInfo_Rogue_Squadron(DWORD dwXYZAddr, DWORD dwColorAddr, DWORD dwXYZCmd, DWORD dwColorCmd) { u32 dwV0 = 0; u32 dwNum = (dwXYZCmd&0xFF00)>>10; RS_Vtx_XYZ * pVtxXYZBase = (RS_Vtx_XYZ*)(g_pu8RamBase + dwXYZAddr); RS_Vtx_Color * pVtxColorBase = (RS_Vtx_Color*)(g_pu8RamBase + dwColorAddr); DWORD i; for (i = dwV0; i < dwV0 + dwNum; i++) { RS_Vtx_XYZ & vertxyz = pVtxXYZBase[i - dwV0]; RS_Vtx_Color & vertcolors = pVtxColorBase[i - dwV0]; g_vtxNonTransformed[i].x = (float)vertxyz.x; g_vtxNonTransformed[i].y = (float)vertxyz.y; g_vtxNonTransformed[i].z = (float)vertxyz.z; if( status.isSSEEnabled ) SSEVec3Transform(i); else { D3DXVec3Transform(&g_vtxTransformed[i], (D3DXVECTOR3*)&g_vtxNonTransformed[i], &gRSPworldProject); // Convert to w=1 g_vecProjected[i].w = 1.0f / g_vtxTransformed[i].w; g_vecProjected[i].x = g_vtxTransformed[i].x * g_vecProjected[i].w; g_vecProjected[i].y = g_vtxTransformed[i].y * g_vecProjected[i].w; g_vecProjected[i].z = g_vtxTransformed[i].z * g_vecProjected[i].w; } g_fFogCoord[i] = g_vecProjected[i].z; if( g_vecProjected[i].w < 0 || g_vecProjected[i].z < 0 || g_fFogCoord[i] < gRSPfFogMin ) g_fFogCoord[i] = gRSPfFogMin; #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_VERTEX_CMD || eventToPause == NEXT_MATRIX_CMD || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DebuggerAppendMsg(" : %f, %f, %f, %f", g_vtxTransformed[i].x,g_vtxTransformed[i].y,g_vtxTransformed[i].z,g_vtxTransformed[i].w); DebuggerAppendMsg(" : %f, %f, %f, %f", g_vecProjected[i].x,g_vecProjected[i].y,g_vecProjected[i].z,g_vecProjected[i].w); } #endif RSP_Vtx_Clipping(i); if( gRSP.bLightingEnable ) { g_normal.x = (float)vertcolors.nx; g_normal.y = (float)vertcolors.ny; g_normal.z = (float)vertcolors.nz; if( status.isSSESupported ) { SSEVec3TransformNormal(); g_dwVecDiffuseCol[i] = SSELightVert(); } else { Vec3TransformNormal(g_normal, gRSPmodelViewTop); g_dwVecDiffuseCol[i] = LightVert(g_normal); } *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = vertcolors.a; // still use alpha from the vertex } else { if( (gRDP.geometryMode & G_SHADE) == 0 && gRSP.ucode < 5 ) //Shade is disabled { g_dwVecDiffuseCol[i] = gRDP.primitiveColor; } else //FLAT shade { g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(vertcolors.r, vertcolors.g, vertcolors.b, vertcolors.a); } } #ifdef _DEBUG if( options.bWinFrameMode ) { g_dwVecDiffuseCol[i] = DAEDALUS_COLOR_RGBA(vertcolors.r, vertcolors.g, vertcolors.b, vertcolors.a); } #endif if( gRDP.geometryMode & G_FOG ) { // Use fog factor to replace vertex alpha *(((BYTE*)&(g_dwVecDiffuseCol[i]))+3) = (BYTE)(g_fFogCoord[i]*255); } /* // Update texture coords n.b. need to divide tu/tv by bogus scale on addition to buffer VECTOR2 & t = g_vecTexture[i]; // If the vert is already lit, then there is no normal (and hence we // can't generate tex coord) if (gRSP.bTextureGen && gRSP.bLightingEnable && g_textures[gRSP.curTile].m_bTextureEnable ) { DaedalusMatrix & matWV = gRSP.modelviewMtxs[gRSP.modelViewMtxTop]; // Assign the spheremap's texture coordinates t.x = (0.5f * ( 1.0f + ( g_normal.x*matWV._11 + g_normal.y*matWV._21 + g_normal.z*matWV._31 ) )); t.y = (0.5f * ( 1.0f - ( g_normal.x*matWV._12 + g_normal.y*matWV._22 + g_normal.z*matWV._32 ) )); } else { t.x = (float)vert.tu; t.y = (float)vert.tv; } */ } StopProfiler(PROFILE_TRANSFORMATING); #ifdef _DEBUG if( (pauseAtNext && (eventToPause == NEXT_TRIANGLE || eventToPause == NEXT_FLUSH_TRI )) && logTriDetailsWithVertexMtx ) { DebuggerAppendMsg("Setting Vertexes: %d - %d\n", dwV0, dwV0+dwNum-1); } DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{TRACE0("Paused at Vertex Cmd");}); #endif } void SetLightCol(DWORD dwLight, DWORD dwCol) { gRSPlights[dwLight].r = (BYTE)((dwCol >> 24)&0xFF); gRSPlights[dwLight].g = (BYTE)((dwCol >> 16)&0xFF); gRSPlights[dwLight].b = (BYTE)((dwCol >> 8)&0xFF); gRSPlights[dwLight].a = 255; // Ignore light alpha gRSPlights[dwLight].fr = (float)gRSPlights[dwLight].r; gRSPlights[dwLight].fg = (float)gRSPlights[dwLight].g; gRSPlights[dwLight].fb = (float)gRSPlights[dwLight].b; gRSPlights[dwLight].fa = 255; // Ignore light alpha } void SetLightDirection(DWORD dwLight, float x, float y, float z) { register float w = (float)sqrt(x*x+y*y+z*z); gRSPlights[dwLight].x = x/w; gRSPlights[dwLight].y = y/w; gRSPlights[dwLight].z = z/w; } static float maxS0, maxT0; static float maxS1, maxT1; static bool validS0, validT0; static bool validS1, validT1; void LogTextureCoords(float fTex0S, float fTex0T, float fTex1S, float fTex1T) { if( validS0 ) { if( fTex0S<0 || fTex0S>maxS0 ) validS0 = false; } if( validT0 ) { if( fTex0T<0 || fTex0T>maxT0 ) validT0 = false; } if( validS1 ) { if( fTex1S<0 || fTex1S>maxS1 ) validS1 = false; } if( validT1 ) { if( fTex1T<0 || fTex1T>maxT1 ) validT1 = false; } } bool CheckTextureCoords(int tex) { if( tex==0 ) { return validS0&&validT0; } else { return validS1&&validT1; } } void ResetTextureCoordsLog(float maxs0, float maxt0, float maxs1, float maxt1) { maxS0 = maxs0; maxT0 = maxt0; maxS1 = maxs1; maxT1 = maxt1; validS0 = validT0 = true; validS1 = validT1 = true; } void ForceMainTextureIndex(int dwTile) { if( dwTile == 1 && !(CDaedalusRender::g_pRender->IsTexel0Enable()) && CDaedalusRender::g_pRender->IsTexel1Enable() ) { // Hack gRSP.curTile = 0; } else gRSP.curTile = dwTile; } #ifdef USING_INT_MATRIX // N64 integer matrix class N64IntMatrix& N64IntMatrix::operator *= ( const N64IntMatrix& mtx ) { int old_nums[16], old_decs[16]; register int i,j; for(i=0; i<16; i++) { old_nums[i] = nums[i]; old_nums[i] = decs[i]; } for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) { nums[index(i,j)] = old_nums[index(i,0)]*mtx.nums[index(0,j)] + old_nums[index(i,1)]*mtx.nums[index(1,j)] + old_nums[index(i,2)]*mtx.nums[index(2,j)] + old_nums[index(i,3)]*mtx.nums[index(3,j)]; nums[index(i,j)] += (old_nums[index(i,0)]*mtx.decs[index(0,j)] + old_nums[index(i,1)]*mtx.decs[index(1,j)] + old_nums[index(i,2)]*mtx.decs[index(2,j)] + old_nums[index(i,3)]*mtx.decs[index(3,j)])>>16; nums[index(i,j)] += (old_decs[index(i,0)]*mtx.nums[index(0,j)] + old_decs[index(i,1)]*mtx.nums[index(1,j)] + old_decs[index(i,2)]*mtx.nums[index(2,j)] + old_decs[index(i,3)]*mtx.nums[index(3,j)])>>16; decs[index(i,j)] = (old_nums[index(i,0)]*mtx.decs[index(0,j)] + old_nums[index(i,1)]*mtx.decs[index(1,j)] + old_nums[index(i,2)]*mtx.decs[index(2,j)] + old_nums[index(i,3)]*mtx.decs[index(3,j)])%65536; decs[index(i,j)] += (old_decs[index(i,0)]*mtx.nums[index(0,j)] + old_decs[index(i,1)]*mtx.nums[index(1,j)] + old_decs[index(i,2)]*mtx.nums[index(2,j)] + old_decs[index(i,3)]*mtx.nums[index(3,j)])%65536; decs[index(i,j)] += (old_decs[index(i,0)]*mtx.decs[index(0,j)] + old_decs[index(i,1)]*mtx.decs[index(1,j)] + old_decs[index(i,2)]*mtx.decs[index(2,j)] + old_decs[index(i,3)]*mtx.decs[index(3,j)]); } } return *this; } N64IntMatrix N64IntMatrix::operator * ( const N64IntMatrix& mtx ) const { //DaedalusMatrix a = this->convert(); //DaedalusMatrix b = mtx.convert(); //DaedalusMatrix c = a*b; N64IntMatrix newMtx; register i,j; int res0; int res1; int res2; u32 res3; int dec; //float org, res, dif; for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) { res0 = nums[i*4+0]*mtx.nums[0*4+j] + nums[i*4+1]*mtx.nums[1*4+j] + nums[i*4+2]*mtx.nums[2*4+j] + nums[i*4+3]*mtx.nums[3*4+j]; res1 = decs[i*4+0]*mtx.nums[0*4+j] + decs[i*4+1]*mtx.nums[1*4+j] + decs[i*4+2]*mtx.nums[2*4+j] + decs[i*4+3]*mtx.nums[3*4+j]; res2 = nums[i*4+0]*mtx.decs[0*4+j] + nums[i*4+1]*mtx.decs[1*4+j] + nums[i*4+2]*mtx.decs[2*4+j] + nums[i*4+3]*mtx.decs[3*4+j]; res3 = ((decs[i*4+0]*mtx.decs[0*4+j])>>16) + ((decs[i*4+1]*mtx.decs[1*4+j])>>16) + ((decs[i*4+2]*mtx.decs[2*4+j])>>16) + ((decs[i*4+3]*mtx.decs[3*4+j])>>16); newMtx.nums[i*4+j] = res0 + (res1+res2+(int)(res3))/65536; dec = (res1+res2+(int)(res3))%65536; if( dec < 0 ) { newMtx.nums[i*4+j]--; dec += 65536; } newMtx.decs[i*4+j] = dec; //org = *((&(c._11))+i*4+j); //res = newMtx.nums[i*4+j] + newMtx.decs[i*4+j]/65536.0f; //dif = org-res; //if( dif > 0.001 || dif < -0.001 ) //{ // dif = org-res; //}/ } } //DaedalusMatrix d = newMtx.convert(); //DaedalusMatrix e = c-d; return newMtx; } N64IntMatrix::N64IntMatrix( const N64IntMatrix& mtx ) { for(register int i=0; i<16; i++) { nums[i] = mtx.nums[i]; decs[i] = mtx.decs[i]; } } N64IntMatrix::N64IntMatrix( const short new_nums[16], const int new_decs[16]) { for(register int i=0; i<16; i++) { nums[i] = new_nums[i]; decs[i] = new_decs[i]; } } N64IntMatrix::N64IntMatrix(const WORD *new_nums, const WORD *new_decs) { for(register int i=0; i<16; i++) { nums[i] = (short)new_nums[i]; decs[i] = (int)new_decs[i]; } } N64IntMatrix::N64IntMatrix() { for( int i=0;i<16;i++ ) { nums[i] = 0; decs[i] = 0; } nums[0]=nums[5]=nums[10]=nums[15]=1; } N64IntMatrix::N64IntMatrix(DWORD addr) { register int i,j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { short nDataHi = *(short *)(((addr+(i<<3)+(j<<1) )^0x2)); WORD nDataLo = *(WORD *)(((addr+(i<<3)+(j<<1) + 32)^0x2)); nums[(i<<2)+j] = nDataHi; decs[(i<<2)+j] = nDataLo; } } } DaedalusMatrix N64IntMatrix::convert() const { DaedalusMatrix mtx; for( int i=0; i<16; i++ ) { *(&(mtx._11)+i) = nums[i]+decs[i]/65536.0f; } return mtx; } #endif