1 ; Musepack audio compression 2 ; Copyright (C) 1999-2004 Buschmann/Klemm/Piecha/Wolf 3 ; 4 ; This library is free software; you can redistribute it and/or 5 ; modify it under the terms of the GNU Lesser General Public 6 ; License as published by the Free Software Foundation; either 7 ; version 2.1 of the License, or (at your option) any later version. 8 ; 9 ; This library is distributed in the hope that it will be useful, 10 ; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 ; Lesser General Public License for more details. 13 ; 14 ; You should have received a copy of the GNU Lesser General Public 15 ; License along with this library; if not, write to the Free Software 16 ; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 18 ; 19 %include "tools.inc" 20 <1> ; 21 <1> ; (C) Ururi 1999 22 <1> ; 23 <1> 24 <1> BITS 32 25 <1> 26 <1> %ifdef WIN32 27 <1> %define _NAMING 28 <1> %define segment_code segment .text align=32 class=CODE use32 29 <1> %define segment_data segment .data align=32 class=DATA use32 30 <1> %ifdef __BORLANDC__ 31 <1> %define segment_bss segment .data align=32 class=DATA use32 32 <1> %else 33 <1> %define segment_bss segment .bss align=32 class=DATA use32 34 <1> %endif 35 <1> 36 <1> %elifdef AOUT 37 <1> %define _NAMING 38 <1> %define segment_code segment .text 39 <1> %define segment_data segment .data 40 <1> %define segment_bss segment .bss 41 <1> 42 <1> %else 43 <1> %define segment_code segment .text align=32 class=CODE use32 44 <1> %define segment_data segment .data align=32 class=DATA use32 45 <1> %define segment_bss segment .bss align=32 class=DATA use32 46 <1> %endif 47 <1> 48 <1> %define pmov movq 49 <1> %define pmovd movd 50 <1> 51 <1> %define pupldq punpckldq 52 <1> %define puphdq punpckhdq 53 <1> %define puplwd punpcklwd 54 <1> %define puphwd punpckhwd 55 <1> 56 <1> %imacro globaldef 1 57 <1> %ifdef _NAMING 58 <1> %define %1 _%1 59 <1> %endif 60 <1> global %1 61 <1> %endmacro 62 <1> 63 <1> %imacro externdef 1 64 <1> %ifdef _NAMING 65 <1> %define %1 _%1 66 <1> %endif 67 <1> extern %1 68 <1> %endmacro 69 <1> 70 <1> %imacro proc 1 71 <1> %push proc 72 <1> global _%1 73 <1> global %1 74 <1> _%1: 75 <1> %1: 76 <1> %assign %$STACK 0 77 <1> %assign %$STACKN 0 78 <1> %assign %$ARG 4 79 <1> %endmacro 80 <1> 81 <1> %imacro endproc 0 82 <1> %ifnctx proc 83 <1> %error expected 'proc' before 'endproc'. 84 <1> %else 85 <1> %if %$STACK > 0 86 <1> add esp, %$STACK 87 <1> %endif 88 <1> 89 <1> %if %$STACK <> (-%$STACKN) 90 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 91 <1> %endif 92 <1> 93 <1> ret 94 <1> %pop 95 <1> %endif 96 <1> %endmacro 97 <1> 98 <1> %idefine sp(a) esp+%$STACK+a 99 <1> 100 <1> %imacro arg 1 101 <1> %00 equ %$ARG 102 <1> %assign %$ARG %$ARG+%1 103 <1> %endmacro 104 <1> 105 <1> %imacro local 1 106 <1> %assign %$STACKN %$STACKN-%1 107 <1> %00 equ %$STACKN 108 <1> %endmacro 109 <1> 110 <1> %imacro alloc 0 111 <1> sub esp, (-%$STACKN)-%$STACK 112 <1> %assign %$STACK (-%$STACKN) 113 <1> %endmacro 114 <1> 115 <1> %imacro pushd 1-* 116 <1> %rep %0 117 <1> push %1 118 <1> %assign %$STACK %$STACK+4 119 <1> %rotate 1 120 <1> %endrep 121 <1> %endmacro 122 <1> 123 <1> %imacro popd 1-* 124 <1> %rep %0 125 <1> %rotate -1 126 <1> pop %1 127 <1> %assign %$STACK %$STACK-4 128 <1> %endrep 129 <1> %endmacro 130 ; 131 %define reg0 eax 132 %define reg1 eax + esi 133 %define reg2 eax + edx 134 %define reg3 eax + edi 135 136 %define off1 esi 137 %define off2 edx 138 %define off3 edi 139 ; 140 segment_data 141 ; 142 align 32 143 00000000 0000008000000000 negativ dd 0x80000000, 0 144 ; 145 %macro turn 2 ; dst, tmp 146 punpckldq %2, %1 ; tmp = src.l | tmp.l 147 punpckhdq %1, %2 ; src = src.l | src.h 148 %endmacro 149 150 ; 151 ; cftmdl ( const int n, const int l, float* a, float* w ); 152 ; 153 segment_code 154 155 align 32 156 proc cftmdl_3DNow_1 157 <1> %push proc 158 <1> global _%1 159 <1> global %1 160 <1> _%1: 161 <1> %1: 162 <1> %assign %$STACK 0 163 <1> %assign %$STACKN 0 164 <1> %assign %$ARG 4 165 pushd ebx, esi, edi, ebp 166 <1> %rep %0 167 <1> push %1 168 <1> %assign %$STACK %$STACK+4 169 <1> %rotate 1 170 <1> %endrep 171 00000000 53 <2> push %1 172 <2> %assign %$STACK %$STACK+4 173 <2> %rotate 1 174 00000001 56 <2> push %1 175 <2> %assign %$STACK %$STACK+4 176 <2> %rotate 1 177 00000002 57 <2> push %1 178 <2> %assign %$STACK %$STACK+4 179 <2> %rotate 1 180 00000003 55 <2> push %1 181 <2> %assign %$STACK %$STACK+4 182 <2> %rotate 1 183 $n1 arg 4 184 <1> $n1 equ %$ARG 185 <1> %assign %$ARG %$ARG+%1 186 $l1 arg 4 187 <1> $l1 equ %$ARG 188 <1> %assign %$ARG %$ARG+%1 189 $a1 arg 4 190 <1> $a1 equ %$ARG 191 <1> %assign %$ARG %$ARG+%1 192 $w1 arg 4 193 <1> $w1 equ %$ARG 194 <1> %assign %$ARG %$ARG+%1 195 196 ; for ( j = 0; j < l; j += 2 ) { a, a+l a+2*l a+3*l 197 ; j1 = j + l; 198 ; j2 = j1 + l; 199 ; j3 = j2 + l; 200 ; x0r = a[j] + a[j1]; 201 ; x0i = a[j + 1] + a[j1 + 1]; 202 ; x1r = a[j] - a[j1]; 203 ; x1i = a[j + 1] - a[j1 + 1]; 204 ; x2r = a[j2] + a[j3]; 205 ; x2i = a[j2 + 1] + a[j3 + 1]; 206 ; x3r = a[j2] - a[j3]; 207 ; x3i = a[j2 + 1] - a[j3 + 1]; 208 ; a[j] = x0r + x2r; 209 ; a[j + 1] = x0i + x2i; 210 ; a[j2] = x0r - x2r; 211 ; a[j2 + 1] = x0i - x2i; 212 ; a[j1] = x1r - x3i; -x3i 213 ; a[j1 + 1] = x1i + x3r; +x3r 214 ; a[j3] = x1r + x3i; 215 ; a[j3 + 1] = x1i - x3r; 216 ; } 217 218 00000004 0F6F3D[00000000] pmov mm7, qword [negativ] ; + | - 219 0000000B 8B44241C mov eax, [sp($a1)] ; eax = a 220 0000000F 31DB xor ebx, ebx ; ebx = j 221 00000011 8B4C2418 mov ecx, [sp($l1)] ; ecx = l 222 223 00000015 8D348D00000000 lea off1, [4*ecx] 224 0000001C 8D14CD00000000 lea off2, [8*ecx] 225 00000023 8D3CCE lea off3, [off1 + 8*ecx] 226 00000026 D1E9 shr ecx, 1 227 lbl1: 228 00000028 0F6F00 pmov mm0, [reg0] 229 0000002B 0F0F04309E pfadd mm0, [reg1] ; x0r, x0i 230 00000030 0F6F08 pmov mm1, [reg0] 231 00000033 0F0F0C309A pfsub mm1, [reg1] ; x1r, x1i 232 00000038 0F6F1410 pmov mm2, [reg2] 233 0000003C 0F0F14389E pfadd mm2, [reg3] ; x2r, x2i 234 00000041 0F6F1C10 pmov mm3, [reg2] 235 00000045 0F0F1C389A pfsub mm3, [reg3] ; x3r, x3i 236 0000004A 0F6FE0 pmov mm4, mm0 237 0000004D 0F0FE29E pfadd mm4, mm2 238 00000051 0F7F20 pmov [reg0], mm4 239 00000054 0F0FC29A pfsub mm0, mm2 240 00000058 0F7F0410 pmov [reg2], mm0 241 turn mm3, mm4 242 0000005C 0F62E3 <1> punpckldq %2, %1 243 0000005F 0F6ADC <1> punpckhdq %1, %2 244 00000062 0FEFDF pxor mm3, mm7 245 00000065 0F6FE1 pmov mm4, mm1 246 00000068 0F0FE39E pfadd mm4, mm3 247 0000006C 0F7F2430 pmov [reg1], mm4 248 00000070 0F0FCB9A pfsub mm1, mm3 249 00000074 0F7F0C38 pmov [reg3], mm1 250 00000078 83C008 add eax, byte 8 251 0000007B 49 dec ecx 252 0000007C 75AA jnz lbl1 253 254 ; m = l << 2; ; ebp = m 255 ; wk1r = w[2]; 256 ; for ( j = m; j < l + m; j += 2 ) { 257 ; j1 = j + l; 258 ; j2 = j1 + l; 259 ; j3 = j2 + l; 260 ; x0r = a[j] + a[j1]; 261 ; x0i = a[j + 1] + a[j1 + 1]; 262 ; x1r = a[j] - a[j1]; 263 ; x1i = a[j + 1] - a[j1 + 1]; 264 ; x2r = a[j2] + a[j3]; 265 ; x2i = a[j2 + 1] + a[j3 + 1]; 266 ; x3r = a[j2] - a[j3]; 267 ; x3i = a[j2 + 1] - a[j3 + 1]; 268 ; a[j] = x0r + x2r; 269 ; a[j + 1] = x0i + x2i; 270 ; a[j2] =-(x0i - x2i); 271 ; a[j2 + 1] = x0r - x2r; 272 ; x0r = x1r - x3i; ; x1r -x3i 273 ; x0i = x1i + x3r; ; x1i x3r 274 ; a[j1] = wk1r * (x0r - x0i); 275 ; a[j1 + 1] = wk1r * (x0r + x0i); 276 ; x0r = x3i + x1r; ; x1r -x3i 1 - 3 x1r + x3i 277 ; x0i = x3r - x1i; ; x1i x3r x1i - x3r 278 ; a[j3] = wk1r * (x0i - x0r); 279 ; a[j3 + 1] = wk1r * (x0i + x0r); 280 ; } 281 282 0000007E 8B44241C mov eax, [sp($a1)] ; eax = a 283 00000082 8B4C2418 mov ecx, [sp($l1)] ; ecx = l 284 00000086 8B6C2420 mov ebp, [sp($w1)] ; ebp = w 285 0000008A 0F6F7508 pmov mm6, [ebp + 8] ; mm6 = ? | w[2] 286 0000008E 0F62F6 punpckldq mm6, mm6 ; mm6 = w[2] | w[2] 287 288 00000091 8D04C8 lea eax, [eax + 8*ecx] 289 00000094 8D04C8 lea eax, [eax + 8*ecx] 290 291 00000097 8D348D00000000 lea off1, [4*ecx] 292 0000009E 8D14CD00000000 lea off2, [8*ecx] 293 000000A5 8D3CCE lea off3, [off1 + 8*ecx] 294 000000A8 D1E9 shr ecx, 1 295 lbl2: 296 000000AA 0F6F00 pmov mm0, [reg0] 297 000000AD 0F0F04309E pfadd mm0, [reg1] ; x0r, x0i 298 000000B2 0F6F08 pmov mm1, [reg0] 299 000000B5 0F0F0C309A pfsub mm1, [reg1] ; x1r, x1i 300 000000BA 0F6F1410 pmov mm2, [reg2] 301 000000BE 0F0F14389E pfadd mm2, [reg3] ; x2r, x2i 302 000000C3 0F6F1C10 pmov mm3, [reg2] 303 000000C7 0F0F1C389A pfsub mm3, [reg3] ; x3r, x3i 304 000000CC 0F6FE0 pmov mm4, mm0 305 000000CF 0F0FE29E pfadd mm4, mm2 306 000000D3 0F7F20 pmov [reg0], mm4 307 000000D6 0F0FC29A pfsub mm0, mm2 308 turn mm0, mm4 309 000000DA 0F62E0 <1> punpckldq %2, %1 310 000000DD 0F6AC4 <1> punpckhdq %1, %2 311 000000E0 0FEFC7 pxor mm0, mm7 312 000000E3 0F7F0410 pmov [reg2], mm0 313 turn mm3, mm4 314 000000E7 0F62E3 <1> punpckldq %2, %1 315 000000EA 0F6ADC <1> punpckhdq %1, %2 316 000000ED 0FEFDF pxor mm3, mm7 317 000000F0 0F6FE1 pmov mm4, mm1 318 000000F3 0F0FE39E pfadd mm4, mm3 319 000000F7 0F6FEC pmov mm5, mm4 320 000000FA 0F62E4 punpckldq mm4, mm4 321 000000FD 0F6AED punpckhdq mm5, mm5 ; r r 322 00000100 0FEFEF pxor mm5, mm7 ; -i i 323 00000103 0F0FE59E pfadd mm4, mm5 324 00000107 0F0FE6B4 pfmul mm4, mm6 325 0000010B 0F7F2430 pmov [reg1], mm4 326 327 0000010F 0F0FCB9A pfsub mm1, mm3 328 00000113 0F6FE1 pmov mm4, mm1 329 00000116 0F62C9 punpckldq mm1, mm1 330 00000119 0FEFCF pxor mm1, mm7 ; -r r 331 0000011C 0F6AE4 punpckhdq mm4, mm4 ; i i 332 0000011F 0F0FE1AA pfsubr mm4, mm1 333 00000123 0F0FE6B4 pfmul mm4, mm6 334 00000127 0F7F2438 pmov [reg3], mm4 335 336 0000012B 83C008 add eax, byte 8 337 0000012E 49 dec ecx 338 0000012F 0F8575FFFFFF jnz near lbl2 339 340 00000135 0F0E femms 341 popd ebx, esi, edi, ebp 342 <1> %rep %0 343 <1> %rotate -1 344 <1> pop %1 345 <1> %assign %$STACK %$STACK-4 346 <1> %endrep 347 <2> %rotate -1 348 00000137 5D <2> pop %1 349 <2> %assign %$STACK %$STACK-4 350 <2> %rotate -1 351 00000138 5F <2> pop %1 352 <2> %assign %$STACK %$STACK-4 353 <2> %rotate -1 354 00000139 5E <2> pop %1 355 <2> %assign %$STACK %$STACK-4 356 <2> %rotate -1 357 0000013A 5B <2> pop %1 358 <2> %assign %$STACK %$STACK-4 359 0000013B C3 ret 360 361 362 0000013C 90 align 32 363 proc cftmdl_3DNow_2 364 <1> %push proc 365 <1> global _%1 366 <1> global %1 367 <1> _%1: 368 <1> %1: 369 <1> %assign %$STACK 0 370 <1> %assign %$STACKN 0 371 <1> %assign %$ARG 4 372 pushd ebx, esi, edi, ebp 373 <1> %rep %0 374 <1> push %1 375 <1> %assign %$STACK %$STACK+4 376 <1> %rotate 1 377 <1> %endrep 378 00000140 53 <2> push %1 379 <2> %assign %$STACK %$STACK+4 380 <2> %rotate 1 381 00000141 56 <2> push %1 382 <2> %assign %$STACK %$STACK+4 383 <2> %rotate 1 384 00000142 57 <2> push %1 385 <2> %assign %$STACK %$STACK+4 386 <2> %rotate 1 387 00000143 55 <2> push %1 388 <2> %assign %$STACK %$STACK+4 389 <2> %rotate 1 390 $n2 arg 4 391 <1> $n2 equ %$ARG 392 <1> %assign %$ARG %$ARG+%1 393 $l2 arg 4 394 <1> $l2 equ %$ARG 395 <1> %assign %$ARG %$ARG+%1 396 $a2 arg 4 397 <1> $a2 equ %$ARG 398 <1> %assign %$ARG %$ARG+%1 399 $w2 arg 4 400 <1> $w2 equ %$ARG 401 <1> %assign %$ARG %$ARG+%1 402 403 00000144 8B44241C mov eax, [sp($a2)] ; eax = a 404 00000148 8B4C2418 mov ecx, [sp($l2)] ; ecx = l 405 0000014C 8B6C2420 mov ebp, [sp($w2)] ; ebp = w 406 407 00000150 55 push ebp ; w + 2*k1 = (esp+20) 408 00000151 55 push ebp ; w + k1 = (esp+16) 409 410 00000152 8D1C8D00000000 lea ebx, [4*ecx] 411 00000159 6800000000 push dword 0 ; k1 = 0 = (esp+12) 412 0000015E 53 push ebx ; m = 4*l = (esp+ 8) 413 0000015F 01DB add ebx, ebx 414 00000161 53 push ebx ; k = 2*m = (esp+ 4) 415 00000162 6800000000 push dword 0 ; k1 = 0 = (esp+ 0) 416 417 ; for ( k = 2*m; k < n; k += 2*m ) { 418 ; k1 += 2; 419 ; wk2r = w[k1]; 420 ; wk2i = w[k1 + 1]; 421 ; wk1r = w[2*k1]; 422 ; wk1i = w[2*k1 + 1]; 423 ; wk3r = wk1r - 2 * wk2i * wk1i; 424 ; wk3i = wk1i - 2 * wk2i * wk1r; 425 426 427 lbl3: 428 00000167 8344241008 add dword [esp+16], byte 8 429 0000016C 8344241410 add dword [esp+20], byte 16 430 00000171 8B5C2410 mov ebx, [esp+16] 431 00000175 0F6F33 pmov mm6, [ebx] ; mm6 = wk2 432 00000178 8B5C2414 mov ebx, [esp+20] 433 0000017C 0F6F2B pmov mm5, [ebx] ; mm5 = mk1 434 0000017F 0F6FE6 pmov mm4, mm6 ; mk1 435 00000182 0F6AE4 punpckhdq mm4, mm4 ; mk1i mk1i 436 00000185 0F0FE49E pfadd mm4, mm4 ; 2*mk1i 2*mk1i 437 00000189 0F6FDD pmov mm3, mm5 438 turn mm3, mm2 439 0000018C 0F62D3 <1> punpckldq %2, %1 440 0000018F 0F6ADA <1> punpckhdq %1, %2 441 00000192 0F0FE3B4 pfmul mm4, mm3 442 00000196 0F0FE5AA pfsubr mm4, mm5 ; mm4 = mk3 443 444 ; j = k; 445 446 lbl4: 447 ; do { 448 ; j1 = j + l; 449 ; j2 = j1 + l; 450 ; j3 = j2 + l; 451 ; x0r = a[j] + a[j1]; 452 ; x0i = a[j + 1] + a[j1 + 1]; 453 ; x1r = a[j] - a[j1]; 454 ; x1i = a[j + 1] - a[j1 + 1]; 455 ; x2r = a[j2] + a[j3]; 456 ; x2i = a[j2 + 1] + a[j3 + 1]; 457 ; x3r = a[j2] - a[j3]; 458 ; x3i = a[j2 + 1] - a[j3 + 1]; 459 ; 460 0000019A 0F6F00 pmov mm0, [reg0] 461 0000019D 0F0F04309E pfadd mm0, [reg1] 462 000001A2 0F6F08 pmov mm1, [reg0] 463 000001A5 0F0F0C309A pfsub mm1, [reg1] 464 000001AA 0F6F1410 pmov mm2, [reg2] 465 000001AE 0F0F14389E pfadd mm2, [reg3] 466 000001B3 0F6F1C10 pmov mm3, [reg2] 467 000001B7 0F0F1C389A pfsub mm3, [reg3] 468 469 ; a[j] = x0r + x2r; 470 ; a[j + 1] = x0i + x2i; 471 472 000001BC 0F6FF8 pmov mm7, mm0 473 000001BF 0F0FFA9E pfadd mm7, mm2 474 000001C3 0F7F38 pmov [reg0], mm7 475 476 ; x0r -= x2r; 477 ; x0i -= x2i; 478 479 000001C6 0F0FC29A pfsub mm0, mm2 480 481 ; a[j2] = wk2r * x0r - wk2i * x0i; 482 ; a[j2 + 1] = wk2r * x0i + wk2i * x0r; // frei sind (mm0), mm2, mm7 483 484 000001CA 0F6FD0 pmov mm2, mm0 485 turn mm2, mm7 ; x0i x0r 486 000001CD 0F62FA <1> punpckldq %2, %1 487 000001D0 0F6AD7 <1> punpckhdq %1, %2 488 000001D3 0F6FFE pmov mm7, mm6 489 000001D6 0F6AFF punpckhdq mm7, mm7 ; wk2i wk2i 490 000001D9 0FEF15[00000000] pxor mm2, [negativ] ;-x0i x0r 491 000001E0 0F0FD7B4 pfmul mm2, mm7 492 000001E4 0F6FFE pmov mm7, mm6 493 000001E7 0F62FF punpckldq mm7, mm7 ; wk2r wk2r 494 000001EA 0F0FF8B4 pfmul mm7, mm0 495 000001EE 0F0FD79E pfadd mm2, mm7 496 000001F2 0F7F1410 pmov [reg2], mm2 497 498 ; x0r = x1r - x3i; 499 ; x0i = x1i + x3r; 500 501 turn mm3, mm2 502 000001F6 0F62D3 <1> punpckldq %2, %1 503 000001F9 0F6ADA <1> punpckhdq %1, %2 504 000001FC 0FEF1D[00000000] pxor mm3, [negativ] 505 00000203 0F6FC1 pmov mm0, mm1 506 00000206 0F0FC39E pfadd mm0, mm3 507 508 ; a[j1] = wk1r * x0r - wk1i * x0i; 509 ; a[j1 + 1] = wk1r * x0i + wk1i * x0r; 510 511 ; x1r += x3i; 512 ; x1i -= x3r; 513 514 0000020A 0F0FCB9A pfsub mm1, mm3 515 516 ; a[j3] = wk3r * x1r + wk3i * x1i; 517 ; a[j3 + 1] = wk3r * x1i - wk3i * x1r; 518 519 ; } while ( j += 2, j < l + k ); 520 ; 521 0000020E 49 dec ecx 522 0000020F 0F8585FFFFFF jnz near lbl4 523 524 525 ; wk1r = w[2*k1 + 2]; 526 ; wk1i = w[2*k1 + 3]; 527 ; wk3r = wk1r - 2 * wk2r * wk1i; 528 ; wk3i = wk1i - 2 * wk2r * wk1r; 529 530 00000215 8B5C2414 mov ebx, [esp+20] 531 00000219 0F6F6B08 pmov mm5, [ebx+ 8] ; mm5 = mk1 532 0000021D 0F6FE6 pmov mm4, mm6 ; mk1 533 00000220 0F62E4 punpckldq mm4, mm4 ; mk1r mk1r 534 00000223 0F0FE49E pfadd mm4, mm4 ; 2*mk1i 2*mk1i 535 00000227 0F6FDD pmov mm3, mm5 536 turn mm3, mm2 537 0000022A 0F62D3 <1> punpckldq %2, %1 538 0000022D 0F6ADA <1> punpckhdq %1, %2 539 00000230 0F0FE3B4 pfmul mm4, mm3 540 00000234 0F0FE5AA pfsubr mm4, mm5 ; mm4 = mk3 541 542 543 544 545 ; j = k + m; 546 547 lbl5: 548 549 ; do { 550 ; j1 = j + l; 551 ; j2 = j1 + l; 552 ; j3 = j2 + l; 553 ; x0r = a[j] + a[j1]; 554 ; x0i = a[j + 1] + a[j1 + 1]; 555 ; x1r = a[j] - a[j1]; 556 ; x1i = a[j + 1] - a[j1 + 1]; 557 ; x2r = a[j2] + a[j3]; 558 ; x2i = a[j2 + 1] + a[j3 + 1]; 559 ; x3r = a[j2] - a[j3]; 560 ; x3i = a[j2 + 1] - a[j3 + 1]; 561 562 00000238 0F6F00 pmov mm0, [reg0] 563 0000023B 0F0F04309E pfadd mm0, [reg1] 564 00000240 0F6F08 pmov mm1, [reg0] 565 00000243 0F0F0C309A pfsub mm1, [reg1] 566 00000248 0F6F1410 pmov mm2, [reg2] 567 0000024C 0F0F14389E pfadd mm2, [reg3] 568 00000251 0F6F1C10 pmov mm3, [reg2] 569 00000255 0F0F1C389A pfsub mm3, [reg3] 570 571 ; a[j] = x0r + x2r; 572 ; a[j + 1] = x0i + x2i; 573 574 0000025A 0F6FF8 pmov mm7, mm0 575 0000025D 0F0FFA9E pfadd mm7, mm2 576 00000261 0F7F38 pmov [reg0], mm7 577 578 ; x0r -= x2r; 579 ; x0i -= x2i; 580 581 00000264 0F0FC29A pfsub mm0, mm2 582 583 ; a[j2] = -wk2i * x0r - wk2r * x0i; 584 ; a[j2 + 1] = -wk2i * x0i + wk2r * x0r; 585 586 00000268 0F6FD0 pmov mm2, mm0 587 turn mm2, mm7 ; x0i x0r 588 0000026B 0F62FA <1> punpckldq %2, %1 589 0000026E 0F6AD7 <1> punpckhdq %1, %2 590 00000271 0F6FFE pmov mm7, mm6 591 00000274 0F62FF punpckldq mm7, mm7 ; wk2i wk2i 592 00000277 0FEF15[00000000] pxor mm2, [negativ] ;-x0i x0r 593 0000027E 0F0FD7B4 pfmul mm2, mm7 594 00000282 0F6FFE pmov mm7, mm6 595 00000285 0F6AFF punpckhdq mm7, mm7 ; wk2r wk2r 596 00000288 0F0FF8B4 pfmul mm7, mm0 597 0000028C 0F0FD7AA pfsubr mm2, mm7 ; ? 598 00000290 0F7F1410 pmov [reg2], mm2 599 600 ; x0r = x1r - x3i; 601 ; x0i = x1i + x3r; 602 603 turn mm3, mm2 604 00000294 0F62D3 <1> punpckldq %2, %1 605 00000297 0F6ADA <1> punpckhdq %1, %2 606 0000029A 0FEF1D[00000000] pxor mm3, [negativ] 607 000002A1 0F6FC1 pmov mm0, mm1 608 000002A4 0F0FC39E pfadd mm0, mm3 609 610 ; a[j1] = wk1r * x0r - wk1i * x0i; 611 ; a[j1 + 1] = wk1r * x0i + wk1i * x0r; 612 613 ; x1r -= x3i; 614 ; x1i -= x3r; 615 616 000002A8 0F0FCB9A pfsub mm1, mm3 617 618 ; a[j3] = wk3r * x1r + wk3i * x1i; 619 ; a[j3 + 1] = wk3r * x1i - wk3i * x1r; 620 621 ; } while ( j += 2, j < l+k+m ); 622 623 000002AC 49 dec ecx 624 000002AD 0F8585FFFFFF jnz near lbl5 625 ; } 626 627 000002B3 0F82AEFEFFFF jc near lbl3 628 629 000002B9 0F0E femms 630 000002BB 81C410000000 add esp, 16 631 popd ebx, esi, edi, ebp 632 <1> %rep %0 633 <1> %rotate -1 634 <1> pop %1 635 <1> %assign %$STACK %$STACK-4 636 <1> %endrep 637 <2> %rotate -1 638 000002C1 5D <2> pop %1 639 <2> %assign %$STACK %$STACK-4 640 <2> %rotate -1 641 000002C2 5F <2> pop %1 642 <2> %assign %$STACK %$STACK-4 643 <2> %rotate -1 644 000002C3 5E <2> pop %1 645 <2> %assign %$STACK %$STACK-4 646 <2> %rotate -1 647 000002C4 5B <2> pop %1 648 <2> %assign %$STACK %$STACK-4 649 000002C5 C3 ret 650 651 652 ;##################################################################