1 ; Musepack audio compression 2 ; Copyright (C) 1999-2004 Buschmann/Klemm/Piecha/Wolf 3 ; 4 ; This library is free software; you can redistribute it and/or 5 ; modify it under the terms of the GNU Lesser General Public 6 ; License as published by the Free Software Foundation; either 7 ; version 2.1 of the License, or (at your option) any later version. 8 ; 9 ; This library is distributed in the hope that it will be useful, 10 ; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 ; Lesser General Public License for more details. 13 ; 14 ; You should have received a copy of the GNU Lesser General Public 15 ; License along with this library; if not, write to the Free Software 16 ; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 18 ;%define USE_FXCH 19 20 %include "tools.inc" 21 <1> ; 22 <1> ; (C) Ururi 1999 23 <1> ; 24 <1> 25 <1> BITS 32 26 <1> 27 <1> %ifdef WIN32 28 <1> %define _NAMING 29 <1> %define segment_code segment .text align=32 class=CODE use32 30 <1> %define segment_data segment .data align=32 class=DATA use32 31 <1> %ifdef __BORLANDC__ 32 <1> %define segment_bss segment .data align=32 class=DATA use32 33 <1> %else 34 <1> %define segment_bss segment .bss align=32 class=DATA use32 35 <1> %endif 36 <1> 37 <1> %elifdef AOUT 38 <1> %define _NAMING 39 <1> %define segment_code segment .text 40 <1> %define segment_data segment .data 41 <1> %define segment_bss segment .bss 42 <1> 43 <1> %else 44 <1> %define segment_code segment .text align=32 class=CODE use32 45 <1> %define segment_data segment .data align=32 class=DATA use32 46 <1> %define segment_bss segment .bss align=32 class=DATA use32 47 <1> %endif 48 <1> 49 <1> %define pmov movq 50 <1> %define pmovd movd 51 <1> 52 <1> %define pupldq punpckldq 53 <1> %define puphdq punpckhdq 54 <1> %define puplwd punpcklwd 55 <1> %define puphwd punpckhwd 56 <1> 57 <1> %imacro globaldef 1 58 <1> %ifdef _NAMING 59 <1> %define %1 _%1 60 <1> %endif 61 <1> global %1 62 <1> %endmacro 63 <1> 64 <1> %imacro externdef 1 65 <1> %ifdef _NAMING 66 <1> %define %1 _%1 67 <1> %endif 68 <1> extern %1 69 <1> %endmacro 70 <1> 71 <1> %imacro proc 1 72 <1> %push proc 73 <1> global _%1 74 <1> global %1 75 <1> _%1: 76 <1> %1: 77 <1> %assign %$STACK 0 78 <1> %assign %$STACKN 0 79 <1> %assign %$ARG 4 80 <1> %endmacro 81 <1> 82 <1> %imacro endproc 0 83 <1> %ifnctx proc 84 <1> %error expected 'proc' before 'endproc'. 85 <1> %else 86 <1> %if %$STACK > 0 87 <1> add esp, %$STACK 88 <1> %endif 89 <1> 90 <1> %if %$STACK <> (-%$STACKN) 91 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 92 <1> %endif 93 <1> 94 <1> ret 95 <1> %pop 96 <1> %endif 97 <1> %endmacro 98 <1> 99 <1> %idefine sp(a) esp+%$STACK+a 100 <1> 101 <1> %imacro arg 1 102 <1> %00 equ %$ARG 103 <1> %assign %$ARG %$ARG+%1 104 <1> %endmacro 105 <1> 106 <1> %imacro local 1 107 <1> %assign %$STACKN %$STACKN-%1 108 <1> %00 equ %$STACKN 109 <1> %endmacro 110 <1> 111 <1> %imacro alloc 0 112 <1> sub esp, (-%$STACKN)-%$STACK 113 <1> %assign %$STACK (-%$STACKN) 114 <1> %endmacro 115 <1> 116 <1> %imacro pushd 1-* 117 <1> %rep %0 118 <1> push %1 119 <1> %assign %$STACK %$STACK+4 120 <1> %rotate 1 121 <1> %endrep 122 <1> %endmacro 123 <1> 124 <1> %imacro popd 1-* 125 <1> %rep %0 126 <1> %rotate -1 127 <1> pop %1 128 <1> %assign %$STACK %$STACK-4 129 <1> %endrep 130 <1> %endmacro 131 ; 132 133 ;************************************************************************* 134 135 segment_data 136 137 align 32 138 00000000 00000000000080BF00- Di_opt_SIMD dd 0.0, -1.0, -1.0, -1.0 139 00000009 0080BF000080BF 140 00000010 0000E8C10000F8C100- dd -29.0, -31.0, -35.0, -38.0 141 00000019 000CC2000018C2 142 00000020 0000554300005A4300- dd 213.0, 218.0, 222.0, 225.0 143 00000029 005E4300006143 144 00000030 0080E5C300C001C400- dd -459.0, -519.0, -581.0, -645.0 145 00000039 4011C4004021C4 146 00000040 00A0FE440000FA4400- dd 2037.0, 2000.0, 1952.0, 1893.0 147 00000049 00F44400A0EC44 148 00000050 0008A1C50068ACC500- dd -5153.0, -5517.0, -5879.0, -6237.0 149 00000059 B8B7C500E8C2C5 150 00000060 0070CD450038BA4500- dd 6574.0, 5959.0, 5288.0, 4561.0 151 00000069 40A54500888E45 152 00000070 007112C700A819C700- dd -37489.0, -39336.0, -41176.0, -43006.0 153 00000079 D820C700FE27C7 154 00000080 008F92470078924700- dd 75038.0, 74992.0, 74856.0, 74630.0 155 00000089 34924700C39147 156 00000090 0071124700380B4700- dd 37489.0, 35640.0, 33791.0, 31947.0 157 00000099 FF03470096F946 158 000000A0 0070CD4500F0DE4500- dd 6574.0, 7134.0, 7640.0, 8092.0 159 000000A9 C0EE4500E0FC45 160 000000B0 0008A14500A0954500- dd 5153.0, 4788.0, 4425.0, 4063.0 161 000000B9 488A4500F07D45 162 000000C0 00A0FE4400F0004500- dd 2037.0, 2063.0, 2080.0, 2087.0 163 000000C9 00024500700245 164 000000D0 0080E5430080C84300- dd 459.0, 401.0, 347.0, 294.0 165 000000D9 80AD4300009343 166 000000E0 000055430000504300- dd 213.0, 208.0, 202.0, 196.0 167 000000E9 004A4300004443 168 000000F0 0000E8410000D04100- dd 29.0, 26.0, 24.0, 21.0 169 000000F9 00C0410000A841 170 171 00000100 000080BF000080BF00- dd -1.0, -1.0, -1.0, -2.0 172 00000109 0080BF000000C0 173 00000110 000024C2000034C200- dd -41.0, -45.0, -49.0, -53.0 174 00000119 0044C2000054C2 175 00000120 000063430000644300- dd 227.0, 228.0, 228.0, 227.0 176 00000129 00644300006343 177 00000130 00C031C400C042C400- dd -711.0, -779.0, -848.0, -919.0 178 00000139 0054C400C065C4 179 00000140 00C0E3440060D94400- dd 1822.0, 1739.0, 1644.0, 1535.0 180 00000149 80CD4400E0BF44 181 00000150 00E8CDC500B8D8C500- dd -6589.0, -6935.0, -7271.0, -7597.0 182 00000159 38E3C50068EDC5 183 00000160 00006C450070374500- dd 3776.0, 2935.0, 2037.0, 1082.0 184 00000169 A0FE4400408744 185 00000170 00152FC7001936C700- dd -44821.0, -46617.0, -48390.0, -50137.0 186 00000179 063DC700D943C7 187 00000180 80249147005A904780- dd 74313.0, 73908.0, 73415.0, 72835.0 188 00000189 638F4780418E47 189 00000190 0040EB460002DD4600- dd 30112.0, 28289.0, 26482.0, 24694.0 190 00000199 E4CE4600ECC046 191 000001A0 00B0044600200A4600- dd 8492.0, 8840.0, 9139.0, 9389.0 192 000001A9 CC0E4600B41246 193 000001B0 009067450070514500- dd 3705.0, 3351.0, 3004.0, 2663.0 194 000001B9 C03B4500702645 195 000001C0 0050024500B0014500- dd 2085.0, 2075.0, 2057.0, 2032.0 196 000001C9 9000450000FE44 197 000001D0 000074430000454300- dd 244.0, 197.0, 153.0, 111.0 198 000001D9 0019430000DE42 199 000001E0 00003E430000374300- dd 190.0, 183.0, 176.0, 169.0 200 000001E9 00304300002943 201 000001F0 000098410000884100- dd 19.0, 17.0, 16.0, 14.0 202 000001F9 00804100006041 203 204 00000200 000000C0000000C000- dd -2.0, -2.0, -2.0, -3.0 205 00000209 0000C0000040C0 206 00000210 000068C200007CC200- dd -58.0, -63.0, -68.0, -73.0 207 00000219 0088C2000092C2 208 00000220 0000604300005D4300- dd 224.0, 221.0, 215.0, 208.0 209 00000229 00574300005043 210 00000230 00C077C4000085C400- dd -991.0, -1064.0, -1137.0, -1210.0 211 00000239 208EC4004097C4 212 00000240 00C0B0440000A04400- dd 1414.0, 1280.0, 1131.0, 970.0 213 00000249 608D4400807244 214 00000250 0030F7C5004400C600- dd -7910.0, -8209.0, -8491.0, -8755.0 215 00000259 AC04C600CC08C6 216 00000260 00008C42008079C400- dd 70.0, -998.0, -2122.0, -3300.0 217 00000269 A004C500404EC5 218 00000270 008D4AC7001E51C700- dd -51853.0, -53534.0, -55178.0, -56778.0 219 00000279 8A57C700CA5DC7 220 00000280 80F48C47007E8B4700- dd 72169.0, 71420.0, 70590.0, 69679.0 221 00000289 DF894780178847 222 00000290 0022B346008AA54600- dd 22929.0, 21189.0, 19478.0, 17799.0 223 00000299 2C9846000E8B46 224 000002A0 00E015460058184600- dd 9592.0, 9750.0, 9863.0, 9935.0 225 000002A9 1C1A46003C1B46 226 000002B0 00A0114500C0FA4400- dd 2330.0, 2006.0, 1692.0, 1388.0 227 000002B9 80D3440080AD44 228 000002C0 0020FA440040F54400- dd 2001.0, 1962.0, 1919.0, 1870.0 229 000002C9 E0EF4400C0E944 230 000002D0 000090420000104200- dd 72.0, 36.0, 2.0, -29.0 231 000002D9 0000400000E8C1 232 000002E0 0000214300001A4300- dd 161.0, 154.0, 147.0, 139.0 233 000002E9 00134300000B43 234 000002F0 000050410000304100- dd 13.0, 11.0, 10.0, 9.0 235 000002F9 00204100001041 236 237 00000300 000040C0000080C000- dd -3.0, -4.0, -4.0, -5.0 238 00000309 0080C00000A0C0 239 00000310 00009EC20000AAC200- dd -79.0, -85.0, -91.0, -97.0 240 00000319 00B6C20000C2C2 241 00000320 0000484300003D4300- dd 200.0, 189.0, 177.0, 163.0 242 00000329 00314300002343 243 00000330 0060A0C40080A9C400- dd -1283.0, -1356.0, -1428.0, -1498.0 244 00000339 80B2C40040BBC4 245 00000340 008046440040174400- dd 794.0, 605.0, 402.0, 185.0 246 00000349 00C94300003943 247 00000350 00980CC6000C10C600- dd -8998.0, -9219.0, -9416.0, -9585.0 248 00000359 2013C600C415C6 249 00000360 00A88DC500D0B5C500- dd -4533.0, -5818.0, -7154.0, -8540.0 250 00000369 90DFC5007005C6 251 00000370 00DD63C700BE69C700- dd -58333.0, -59838.0, -61289.0, -62684.0 252 00000379 696FC700DC74C7 253 00000380 002A86478016844700- dd 68692.0, 67629.0, 66494.0, 65290.0 254 00000389 DF8147000A7F47 255 00000390 006C7C460050634600- dd 16155.0, 14548.0, 12980.0, 11455.0 256 00000399 D04A4600FC3246 257 000003A0 00B81B46009C1B4600- dd 9966.0, 9959.0, 9916.0, 9838.0 258 000003A9 F01A4600B81946 259 000003B0 00E0884400804B4400- dd 1095.0, 814.0, 545.0, 288.0 260 000003B9 40084400009043 261 000003C0 0020E34400E0DB4400- dd 1817.0, 1759.0, 1698.0, 1634.0 262 000003C9 40D4440040CC44 263 000003D0 000064C20000A6C200- dd -57.0, -83.0, -106.0, -127.0 264 000003D9 00D4C20000FEC2 265 000003E0 000004430000FA4200- dd 132.0, 125.0, 117.0, 111.0 266 000003E9 00EA420000DE42 267 000003F0 000000410000E04000- dd 8.0, 7.0, 7.0, 6.0 268 000003F9 00E0400000C040 269 270 00000400 0000A0C00000C0C000- dd -5.0, -6.0, -7.0, -7.0 271 00000409 00E0C00000E0C0 272 00000410 0000D0C20000DEC200- dd -104.0, -111.0, -117.0, -125.0 273 00000419 00EAC20000FAC2 274 00000420 000012430000FE4200- dd 146.0, 127.0, 106.0, 83.0 275 00000429 00D4420000A642 276 00000430 00E0C3C40040CCC400- dd -1567.0, -1634.0, -1698.0, -1759.0 277 00000439 40D4C400E0DBC4 278 00000440 000034C2000090C300- dd -45.0, -288.0, -545.0, -814.0 279 00000449 4008C400804BC4 280 00000450 00FC17C600B819C600- dd -9727.0, -9838.0, -9916.0, -9959.0 281 00000459 F01AC6009C1BC6 282 00000460 00DC1BC600FC32C600- dd -9975.0, -11455.0, -12980.0, -14548.0 283 00000469 D04AC6005063C6 284 00000470 00137AC7000A7FC700- dd -64019.0, -65290.0, -66494.0, -67629.0 285 00000479 DF81C7801684C7 286 00000480 00137A4700DC744700- dd 64019.0, 62684.0, 61289.0, 59838.0 287 00000489 696F4700BE6947 288 00000490 00DC1B460070054600- dd 9975.0, 8540.0, 7154.0, 5818.0 289 00000499 90DF4500D0B545 290 000004A0 00FC174600C4154600- dd 9727.0, 9585.0, 9416.0, 9219.0 291 000004A9 201346000C1046 292 000004B0 00003442000039C300- dd 45.0, -185.0, -402.0, -605.0 293 000004B9 00C9C3004017C4 294 000004C0 00E0C3440040BB4400- dd 1567.0, 1498.0, 1428.0, 1356.0 295 000004C9 80B2440080A944 296 000004D0 000012C3000023C300- dd -146.0, -163.0, -177.0, -189.0 297 000004D9 0031C300003DC3 298 000004E0 0000D0420000C24200- dd 104.0, 97.0, 91.0, 85.0 299 000004E9 00B6420000AA42 300 000004F0 0000A0400000A04000- dd 5.0, 5.0, 4.0, 4.0 301 000004F9 00804000008040 302 303 00000500 000000C1000010C100- dd -8.0, -9.0, -10.0, -11.0 304 00000509 0020C1000030C1 305 00000510 000004C300000BC300- dd -132.0, -139.0, -147.0, -154.0 306 00000519 0013C300001AC3 307 00000520 000064420000E84100- dd 57.0, 29.0, -2.0, -36.0 308 00000529 0000C0000010C2 309 00000530 0020E3C400C0E9C400- dd -1817.0, -1870.0, -1919.0, -1962.0 310 00000539 E0EFC40040F5C4 311 00000540 00E088C40080ADC400- dd -1095.0, -1388.0, -1692.0, -2006.0 312 00000549 80D3C400C0FAC4 313 00000550 00B81BC6003C1BC600- dd -9966.0, -9935.0, -9863.0, -9750.0 314 00000559 1C1AC6005818C6 315 00000560 006C7CC6000E8BC600- dd -16155.0, -17799.0, -19478.0, -21189.0 316 00000569 2C98C6008AA5C6 317 00000570 002A86C7801788C700- dd -68692.0, -69679.0, -70590.0, -71420.0 318 00000579 DF89C7007E8BC7 319 00000580 00DD634700CA5D4700- dd 58333.0, 56778.0, 55178.0, 53534.0 320 00000589 8A5747001E5147 321 00000590 00A88D4500404E4500- dd 4533.0, 3300.0, 2122.0, 998.0 322 00000599 A0044500807944 323 000005A0 00980C4600CC084600- dd 8998.0, 8755.0, 8491.0, 8209.0 324 000005A9 AC044600440046 325 000005B0 008046C4008072C400- dd -794.0, -970.0, -1131.0, -1280.0 326 000005B9 608DC40000A0C4 327 000005C0 0060A0440040974400- dd 1283.0, 1210.0, 1137.0, 1064.0 328 000005C9 208E4400008544 329 000005D0 000048C3000050C300- dd -200.0, -208.0, -215.0, -221.0 330 000005D9 0057C300005DC3 331 000005E0 00009E420000924200- dd 79.0, 73.0, 68.0, 63.0 332 000005E9 00884200007C42 333 000005F0 000040400000404000- dd 3.0, 3.0, 2.0, 2.0 334 000005F9 00004000000040 335 336 00000600 000050C1000060C100- dd -13.0, -14.0, -16.0, -17.0 337 00000609 0080C1000088C1 338 00000610 000021C3000029C300- dd -161.0, -169.0, -176.0, -183.0 339 00000619 0030C3000037C3 340 00000620 000090C20000DEC200- dd -72.0, -111.0, -153.0, -197.0 341 00000629 0019C3000045C3 342 00000630 0020FAC40000FEC400- dd -2001.0, -2032.0, -2057.0, -2075.0 343 00000639 9000C500B001C5 344 00000640 00A011C5007026C500- dd -2330.0, -2663.0, -3004.0, -3351.0 345 00000649 C03BC5007051C5 346 00000650 00E015C600B412C600- dd -9592.0, -9389.0, -9139.0, -8840.0 347 00000659 CC0EC600200AC6 348 00000660 0022B3C600ECC0C600- dd -22929.0, -24694.0, -26482.0, -28289.0 349 00000669 E4CEC60002DDC6 350 00000670 80F48CC780418EC780- dd -72169.0, -72835.0, -73415.0, -73908.0 351 00000679 638FC7005A90C7 352 00000680 008D4A4700D9434700- dd 51853.0, 50137.0, 48390.0, 46617.0 353 00000689 063D4700193647 354 00000690 00008CC2004087C400- dd -70.0, -1082.0, -2037.0, -2935.0 355 00000699 A0FEC4007037C5 356 000006A0 0030F7450068ED4500- dd 7910.0, 7597.0, 7271.0, 6935.0 357 000006A9 38E34500B8D845 358 000006B0 00C0B0C400E0BFC400- dd -1414.0, -1535.0, -1644.0, -1739.0 359 000006B9 80CDC40060D9C4 360 000006C0 00C0774400C0654400- dd 991.0, 919.0, 848.0, 779.0 361 000006C9 00544400C04244 362 000006D0 000060C3000063C300- dd -224.0, -227.0, -228.0, -228.0 363 000006D9 0064C3000064C3 364 000006E0 000068420000544200- dd 58.0, 53.0, 49.0, 45.0 365 000006E9 00444200003442 366 000006F0 000000400000004000- dd 2.0, 2.0, 1.0, 1.0 367 000006F9 00803F0000803F 368 369 00000700 000098C10000A8C100- dd -19.0, -21.0, -24.0, -26.0 370 00000709 00C0C10000D0C1 371 00000710 00003EC3000044C300- dd -190.0, -196.0, -202.0, -208.0 372 00000719 004AC3000050C3 373 00000720 000074C3000093C300- dd -244.0, -294.0, -347.0, -401.0 374 00000729 80ADC30080C8C3 375 00000730 005002C5007002C500- dd -2085.0, -2087.0, -2080.0, -2063.0 376 00000739 0002C500F000C5 377 00000740 009067C500F07DC500- dd -3705.0, -4063.0, -4425.0, -4788.0 378 00000749 488AC500A095C5 379 00000750 00B004C600E0FCC500- dd -8492.0, -8092.0, -7640.0, -7134.0 380 00000759 C0EEC500F0DEC5 381 00000760 0040EBC60096F9C600- dd -30112.0, -31947.0, -33791.0, -35640.0 382 00000769 FF03C700380BC7 383 00000770 802491C700C391C700- dd -74313.0, -74630.0, -74856.0, -74992.0 384 00000779 3492C7007892C7 385 00000780 00152F4700FE274700- dd 44821.0, 43006.0, 41176.0, 39336.0 386 00000789 D8204700A81947 387 00000790 00006CC500888EC500- dd -3776.0, -4561.0, -5288.0, -5959.0 388 00000799 40A5C50038BAC5 389 000007A0 00E8CD4500E8C24500- dd 6589.0, 6237.0, 5879.0, 5517.0 390 000007A9 B8B7450068AC45 391 000007B0 00C0E3C400A0ECC400- dd -1822.0, -1893.0, -1952.0, -2000.0 392 000007B9 00F4C40000FAC4 393 000007C0 00C031440040214400- dd 711.0, 645.0, 581.0, 519.0 394 000007C9 40114400C00144 395 000007D0 000063C3000061C300- dd -227.0, -225.0, -222.0, -218.0 396 000007D9 005EC300005AC3 397 000007E0 000024420000184200- dd 41.0, 38.0, 35.0, 31.0 398 000007E9 000C420000F841 399 000007F0 0000803F0000803F00- dd 1.0, 1.0, 1.0, 1.0 400 000007F9 00803F0000803F 401 402 externdef Di_opt 403 <1> %ifdef _NAMING 404 <1> %define %1 _%1 405 <1> %endif 406 <1> extern %1 407 408 %define C00 0.500000000000000000000000 409 %define C01 0.500602998235196301334178 410 %define C02 0.502419286188155705518560 411 %define C03 0.505470959897543659956626 412 %define C04 0.509795579104159168925062 413 %define C05 0.515447309922624546962323 414 %define C06 0.522498614939688880640101 415 %define C07 0.531042591089784174473998 416 %define C08 0.541196100146196984405269 417 %define C09 0.553103896034444527838540 418 %define C10 0.566944034816357703685831 419 %define C11 0.582934968206133873665654 420 %define C12 0.601344886935045280535340 421 %define C13 0.622504123035664816182728 422 %define C14 0.646821783359990129535794 423 %define C15 0.674808341455005746033820 424 %define C16 0.707106781186547524436104 425 %define C17 0.744536271002298449773679 426 %define C18 0.788154623451250224773056 427 %define C19 0.839349645415527038721463 428 %define C20 0.899976223136415704611808 429 %define C21 0.972568237861960693780520 430 %define C22 1.060677685990347471323668 431 %define C23 1.169439933432884955134476 432 %define C24 1.306562964876376527851784 433 %define C25 1.484164616314166277319733 434 %define C26 1.722447098238333927796261 435 %define C27 2.057781009953411550808880 436 %define C28 2.562915447741506178719328 437 %define C29 3.407608418468718785698107 438 %define C30 5.101148618689163857960189 439 %define C31 10.190008123548056810994678 440 441 align 32 442 00000800 460A23C142165AC0 dd -C31, -C29 ; -C(31),-C(29) 443 00000808 1BF9BDBFAFB203C0 dd -C25, -C27 ; -C(25),-C(27) 444 00000810 EE993EBF9EDF56BF dd -C17, -C19 ; -C(17),-C(19) 445 00000818 35B095BF3BFA78BF dd -C23, -C21 ; -C(23),-C(21) 446 00000820 0000803F0000803FD4- dd 1.0, 1.0, -C08, -C24 ; CM110824 = 1, 1, -C( 8), -C(24) 447 00000829 8B0ABF753DA7BF 448 00000830 0000803F0000803FD4- dd 1.0, 1.0, C08, C24 ; CP110824 = 1, 1, C( 8), C(24) 449 00000839 8B0A3F753DA73F 450 00000840 0000803FF30435BF00- dd 1.0, -C16, 1.0, -C16 ; CM116116 = 1, -C(16), 1, -C(16) 451 00000849 00803FF30435BF 452 00000850 0000803FF304353F00- dd 1.0, C16, 1.0, C16 ; CP116116 = 1, C(16), 1, C(16) 453 00000859 00803FF304353F 454 00000860 F304353FF30435BF C dd C16, -C16 ; C(16),-C(16) 455 00000868 D48B0A3F753DA73F dd C08, C24 ; C( 8), C(24) 456 00000870 F781023FBDF1193F dd C04, C12 ; C( 4), C(12) 457 00000878 CF062440D764663F dd C28, C20 ; C(28), C(20) 458 00000880 8D9E003F78C2053F dd C02, C06 ; C( 2), C( 6) 459 00000888 1D96253F3F23113F dd C14, C10 ; C(14), C(10) 460 00000890 9C3CA3402679DC3F dd C30, C26 ; C(30), C(26) 461 00000898 80C4493F49C4873F dd C18, C22 ; C(18), C(22) 462 000008A0 8527003F8B66013F dd C01, C03 ; C( 1), C( 3) 463 000008A8 68F2073F5BF4033F dd C07, C05 ; C( 7), C( 5) 464 000008B0 3DC02C3F6E5C1F3F dd C15, C13 ; C(15), C(13) 465 000008B8 38980D3F3A3B153F dd C09, C11 ; C( 9), C(11) 466 000008C0 460A234142165A40 dd C31, C29 ; C(31), C(29) 467 000008C8 1BF9BD3FAFB20340 dd C25, C27 ; C(25), C(27) 468 000008D0 EE993E3F9EDF563F dd C17, C19 ; C(17), C(19) 469 000008D8 35B0953F3BFA783F dd C23, C21 ; C(23), C(21) 470 471 %undef C00 472 %undef C01 473 %undef C02 474 %undef C03 475 %undef C04 476 %undef C05 477 %undef C06 478 %undef C07 479 %undef C08 480 %undef C09 481 %undef C10 482 %undef C11 483 %undef C12 484 %undef C13 485 %undef C14 486 %undef C15 487 %undef C16 488 %undef C17 489 %undef C18 490 %undef C19 491 %undef C20 492 %undef C21 493 %undef C22 494 %undef C23 495 %undef C24 496 %undef C25 497 %undef C26 498 %undef C27 499 %undef C28 500 %undef C29 501 %undef C30 502 %undef C31 503 504 ; eax hat auf C zu zeigen, dann können die folgenden 505 ; Makros zum Zugriff auf die Konstanten benutzt werden 506 507 %define CM31 [ eax - 12*8 ] 508 %define CM17 [ eax - 10*8 ] 509 %define CM110824 [ eax - 8*8 ] 510 %define CP110824 [ eax - 6*8 ] 511 %define CM116116 [ eax - 4*8 ] 512 %define CP116116 [ eax - 2*8 ] 513 %define CC04 [ eax + 2*8 ] 514 %define CC02 [ eax + 4*8 ] 515 %define CC30 [ eax + 6*8 ] 516 %define CC01 [ eax + 8*8 ] 517 %define CC15 [ eax + 10*8 ] 518 519 %define C16 qword [ eax + 0*8 ] 520 %define C08 qword [ eax + 1*8 ] 521 %define C04 qword [ eax + 2*8 ] 522 %define C28 qword [ eax + 3*8 ] 523 %define C02 qword [ eax + 4*8 ] 524 %define C14 qword [ eax + 5*8 ] 525 %define C30 qword [ eax + 6*8 ] 526 %define C18 qword [ eax + 7*8 ] 527 %define C01 qword [ eax + 8*8 ] 528 %define C07 qword [ eax + 9*8 ] 529 %define C15 qword [ eax + 10*8 ] 530 %define C09 qword [ eax + 11*8 ] 531 %define C31 qword [ eax + 12*8 ] 532 %define C25 qword [ eax + 13*8 ] 533 %define C17 qword [ eax + 14*8 ] 534 %define C23 qword [ eax + 15*8 ] 535 536 %define _C16 dword [ eax + 0*4 ] 537 %define _C08 dword [ eax + 2*4 ] 538 %define _C24 dword [ eax + 3*4 ] 539 %define _C04 dword [ eax + 4*4 ] 540 %define _C12 dword [ eax + 5*4 ] 541 %define _C28 dword [ eax + 6*4 ] 542 %define _C20 dword [ eax + 7*4 ] 543 %define _C02 dword [ eax + 8*4 ] 544 %define _C06 dword [ eax + 9*4 ] 545 %define _C14 dword [ eax + 10*4 ] 546 %define _C10 dword [ eax + 11*4 ] 547 %define _C30 dword [ eax + 12*4 ] 548 %define _C26 dword [ eax + 13*4 ] 549 %define _C18 dword [ eax + 14*4 ] 550 %define _C22 dword [ eax + 15*4 ] 551 %define _C01 dword [ eax + 16*4 ] 552 %define _C03 dword [ eax + 17*4 ] 553 %define _C07 dword [ eax + 18*4 ] 554 %define _C05 dword [ eax + 19*4 ] 555 %define _C15 dword [ eax + 20*4 ] 556 %define _C13 dword [ eax + 21*4 ] 557 %define _C09 dword [ eax + 22*4 ] 558 %define _C11 dword [ eax + 23*4 ] 559 %define _C31 dword [ eax + 24*4 ] 560 %define _C29 dword [ eax + 25*4 ] 561 %define _C25 dword [ eax + 26*4 ] 562 %define _C27 dword [ eax + 27*4 ] 563 %define _C17 dword [ eax + 28*4 ] 564 %define _C19 dword [ eax + 29*4 ] 565 %define _C23 dword [ eax + 30*4 ] 566 %define _C21 dword [ eax + 31*4 ] 567 568 align 32 569 000008E0 000000470000004700- bias dd 32768.0, 32768.0, 32768.0, 32768.0 570 000008E9 00004700000047 571 ;bias2 dd 1097364144128.0, 1097364144128.0, 1097364144128.0, 1097364144128.0 572 000008F0 00807D5300807D5300- bias2 dd 1088774209536.0, 1088774209536.0, 1088774209536.0, 1088774209536.0 573 000008F9 807D5300807D53 574 00000900 000000800000008000- negativ dd 0x80000000, 0x80000000, 0x80000000, 0x80000000 575 00000909 00008000000080 576 ; dd 0x80000000, 0x80000000, 0x80000000, 0x80000000 ; is line this necessary? 577 00000910 00807D4B bias3 dd 16613376.0 578 579 ;*************************************************************************** 580 581 %macro muladdi 2 582 fld dword [edx+4*(%2)] 583 fmul dword [ecx+4*(%1)] 584 faddp st1 585 %endmacro 586 587 segment_code 588 align 32 589 00000000 90 times 7 nop 590 proc VectorMult_i387 591 <1> %push proc 592 <1> global _%1 593 <1> global %1 594 <1> _%1: 595 <1> %1: 596 <1> %assign %$STACK 0 597 <1> %assign %$STACKN 0 598 <1> %assign %$ARG 4 599 $buff0 arg 4 600 <1> $buff0 equ %$ARG 601 <1> %assign %$ARG %$ARG+%1 602 $V0 arg 4 603 <1> $V0 equ %$ARG 604 <1> %assign %$ARG %$ARG+%1 605 pushd ebx 606 <1> %rep %0 607 <1> push %1 608 <1> %assign %$STACK %$STACK+4 609 <1> %rotate 1 610 <1> %endrep 611 00000007 53 <2> push %1 612 <2> %assign %$STACK %$STACK+4 613 <2> %rotate 1 614 00000008 8B5C2408 mov ebx, [sp($buff0)] 615 0000000C 8B4C240C mov ecx, [sp($V0)] 616 00000010 BA[00000000] mov edx, Di_opt_SIMD 617 00000015 B820000000 mov eax, 32 618 0000001A D905[10090000] fld dword [bias3] 619 lbl9: 620 00000020 D9C0 fld st0 621 %ifndef USE_FXCH 622 muladdi 0, 0 623 00000022 D902 <1> fld dword [edx+4*(%2)] 624 00000024 D809 <1> fmul dword [ecx+4*(%1)] 625 00000026 DEC1 <1> faddp st1 626 muladdi 96, 1 627 00000028 D94204 <1> fld dword [edx+4*(%2)] 628 0000002B D88980010000 <1> fmul dword [ecx+4*(%1)] 629 00000031 DEC1 <1> faddp st1 630 muladdi 128, 2 631 00000033 D94208 <1> fld dword [edx+4*(%2)] 632 00000036 D88900020000 <1> fmul dword [ecx+4*(%1)] 633 0000003C DEC1 <1> faddp st1 634 muladdi 224, 3 635 0000003E D9420C <1> fld dword [edx+4*(%2)] 636 00000041 D88980030000 <1> fmul dword [ecx+4*(%1)] 637 00000047 DEC1 <1> faddp st1 638 muladdi 256, 4 639 00000049 D94210 <1> fld dword [edx+4*(%2)] 640 0000004C D88900040000 <1> fmul dword [ecx+4*(%1)] 641 00000052 DEC1 <1> faddp st1 642 muladdi 352, 5 643 00000054 D94214 <1> fld dword [edx+4*(%2)] 644 00000057 D88980050000 <1> fmul dword [ecx+4*(%1)] 645 0000005D DEC1 <1> faddp st1 646 muladdi 384, 6 647 0000005F D94218 <1> fld dword [edx+4*(%2)] 648 00000062 D88900060000 <1> fmul dword [ecx+4*(%1)] 649 00000068 DEC1 <1> faddp st1 650 muladdi 480, 7 651 0000006A D9421C <1> fld dword [edx+4*(%2)] 652 0000006D D88980070000 <1> fmul dword [ecx+4*(%1)] 653 00000073 DEC1 <1> faddp st1 654 muladdi 512, 8 655 00000075 D94220 <1> fld dword [edx+4*(%2)] 656 00000078 D88900080000 <1> fmul dword [ecx+4*(%1)] 657 0000007E DEC1 <1> faddp st1 658 muladdi 608, 9 659 00000080 D94224 <1> fld dword [edx+4*(%2)] 660 00000083 D88980090000 <1> fmul dword [ecx+4*(%1)] 661 00000089 DEC1 <1> faddp st1 662 muladdi 640, 10 663 0000008B D94228 <1> fld dword [edx+4*(%2)] 664 0000008E D889000A0000 <1> fmul dword [ecx+4*(%1)] 665 00000094 DEC1 <1> faddp st1 666 muladdi 736, 11 667 00000096 D9422C <1> fld dword [edx+4*(%2)] 668 00000099 D889800B0000 <1> fmul dword [ecx+4*(%1)] 669 0000009F DEC1 <1> faddp st1 670 muladdi 768, 12 671 000000A1 D94230 <1> fld dword [edx+4*(%2)] 672 000000A4 D889000C0000 <1> fmul dword [ecx+4*(%1)] 673 000000AA DEC1 <1> faddp st1 674 muladdi 864, 13 675 000000AC D94234 <1> fld dword [edx+4*(%2)] 676 000000AF D889800D0000 <1> fmul dword [ecx+4*(%1)] 677 000000B5 DEC1 <1> faddp st1 678 muladdi 896, 14 679 000000B7 D94238 <1> fld dword [edx+4*(%2)] 680 000000BA D889000E0000 <1> fmul dword [ecx+4*(%1)] 681 000000C0 DEC1 <1> faddp st1 682 muladdi 992, 15 683 000000C2 D9423C <1> fld dword [edx+4*(%2)] 684 000000C5 D889800F0000 <1> fmul dword [ecx+4*(%1)] 685 000000CB DEC1 <1> faddp st1 686 %else 687 fld dword [ecx+4* 0] 688 fmul dword [edx+4* 0] ; prod accu 689 fld dword [ecx+4* 96] ; s1 prod accu 690 fxch st1 ; prod s1 accu 691 faddp st2 ; s1 accu 692 fmul dword [edx+4* 1] ; prod accu 693 fld dword [ecx+4*128] ; s1 prod accu 694 fxch st1 ; prod s1 accu 695 faddp st2 ; s1 accu 696 fmul dword [edx+4* 2] ; prod accu 697 fld dword [ecx+4*224] ; s1 prod accu 698 fxch st1 ; prod s1 accu 699 faddp st2 ; s1 accu 700 fmul dword [edx+4* 3] ; prod accu 701 fld dword [ecx+4*256] ; s1 prod accu 702 fxch st1 ; prod s1 accu 703 faddp st2 ; s1 accu 704 fmul dword [edx+4* 4] ; prod accu 705 fld dword [ecx+4*352] ; s1 prod accu 706 fxch st1 ; prod s1 accu 707 faddp st2 ; s1 accu 708 fmul dword [edx+4* 5] ; prod accu 709 fld dword [ecx+4*384] ; s1 prod accu 710 fxch st1 ; prod s1 accu 711 faddp st2 ; s1 accu 712 fmul dword [edx+4* 6] ; prod accu 713 fld dword [ecx+4*480] ; s1 prod accu 714 fxch st1 ; prod s1 accu 715 faddp st2 ; s1 accu 716 fmul dword [edx+4* 7] ; prod accu 717 fld dword [ecx+4*512] ; s1 prod accu 718 fxch st1 ; prod s1 accu 719 faddp st2 ; s1 accu 720 fmul dword [edx+4* 8] ; prod accu 721 fld dword [ecx+4*608] ; s1 prod accu 722 fxch st1 ; prod s1 accu 723 faddp st2 ; s1 accu 724 fmul dword [edx+4* 9] ; prod accu 725 fld dword [ecx+4*640] ; s1 prod accu 726 fxch st1 ; prod s1 accu 727 faddp st2 ; s1 accu 728 fmul dword [edx+4*10] ; prod accu 729 fld dword [ecx+4*736] ; s1 prod accu 730 fxch st1 ; prod s1 accu 731 faddp st2 ; s1 accu 732 fmul dword [edx+4*11] ; prod accu 733 fld dword [ecx+4*768] ; s1 prod accu 734 fxch st1 ; prod s1 accu 735 faddp st2 ; s1 accu 736 fmul dword [edx+4*12] ; prod accu 737 fld dword [ecx+4*864] ; s1 prod accu 738 fxch st1 ; prod s1 accu 739 faddp st2 ; s1 accu 740 fmul dword [edx+4*13] ; prod accu 741 fld dword [ecx+4*896] ; s1 prod accu 742 fxch st1 ; prod s1 accu 743 faddp st2 ; s1 accu 744 fmul dword [edx+4*14] ; prod accu 745 fld dword [ecx+4*992] ; s1 prod accu 746 fxch st1 ; prod s1 accu 747 faddp st2 ; s1 accu 748 fmul dword [edx+4*15] ; prod accu 749 faddp st1 ; accu 750 %endif 751 000000CD 8D5240 lea edx, [edx + 64] 752 000000D0 8D4904 lea ecx, [ecx + 4] 753 754 000000D3 D91B fstp dword [ebx] 755 000000D5 8D5B04 lea ebx, [ebx + 4] 756 000000D8 48 dec eax 757 000000D9 0F8541FFFFFF jnz near lbl9 758 759 000000DF DDD8 fstp st0 760 761 popd ebx 762 <1> %rep %0 763 <1> %rotate -1 764 <1> pop %1 765 <1> %assign %$STACK %$STACK-4 766 <1> %endrep 767 <2> %rotate -1 768 000000E1 5B <2> pop %1 769 <2> %assign %$STACK %$STACK-4 770 endproc 771 <1> %ifnctx proc 772 <1> %error expected 'proc' before 'endproc'. 773 <1> %else 774 <1> %if %$STACK > 0 775 <1> add esp, %$STACK 776 <1> %endif 777 <1> 778 <1> %if %$STACK <> (-%$STACKN) 779 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 780 <1> %endif 781 <1> 782 000000E2 C3 <1> ret 783 <1> %pop 784 <1> %endif 785 786 787 ;*************************************************************************** 788 789 %macro muladd3 2 790 pmov mm2, qword [ecx+4*(%1)] 791 pmov mm3, qword [ecx+4*(%1)+8] 792 pfmul mm2, qword [edx+4*(%2)] 793 pfmul mm3, qword [edx+4*(%2)+8] 794 pfadd mm0, mm2 795 pfadd mm1, mm3 796 %endmacro 797 798 segment_code 799 000000E3 90 align 32 800 00000100 90 times 6 nop 801 proc VectorMult_3DNow 802 <1> %push proc 803 <1> global _%1 804 <1> global %1 805 <1> _%1: 806 <1> %1: 807 <1> %assign %$STACK 0 808 <1> %assign %$STACKN 0 809 <1> %assign %$ARG 4 810 $buff1 arg 4 811 <1> $buff1 equ %$ARG 812 <1> %assign %$ARG %$ARG+%1 813 $V1 arg 4 814 <1> $V1 equ %$ARG 815 <1> %assign %$ARG %$ARG+%1 816 pushd ebx 817 <1> %rep %0 818 <1> push %1 819 <1> %assign %$STACK %$STACK+4 820 <1> %rotate 1 821 <1> %endrep 822 00000106 53 <2> push %1 823 <2> %assign %$STACK %$STACK+4 824 <2> %rotate 1 825 00000107 8B5C2408 mov ebx, [sp($buff1)] 826 0000010B 8B4C240C mov ecx, [sp($V1)] 827 0000010F BA[00000000] mov edx, Di_opt_SIMD 828 00000114 B808000000 mov eax, 8 829 00000119 0F6F25[E0080000] pmov mm4, qword [bias] 830 lbl1: 831 00000120 0F6F01 pmov mm0, qword [ecx] 832 00000123 0F6F4908 pmov mm1, qword [ecx+8] 833 00000127 0F0F02B4 pfmul mm0, qword [edx] 834 0000012B 0F0F4A08B4 pfmul mm1, qword [edx+8] 835 00000130 0F0FC49E pfadd mm0, mm4 836 00000134 0F0FCC9E pfadd mm1, mm4 837 838 muladd3 96, 4 839 00000138 0F6F9180010000 <1> pmov mm2, qword [ecx+4*(%1)] 840 0000013F 0F6F9988010000 <1> pmov mm3, qword [ecx+4*(%1)+8] 841 00000146 0F0F5210B4 <1> pfmul mm2, qword [edx+4*(%2)] 842 0000014B 0F0F5A18B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 843 00000150 0F0FC29E <1> pfadd mm0, mm2 844 00000154 0F0FCB9E <1> pfadd mm1, mm3 845 muladd3 128, 8 846 00000158 0F6F9100020000 <1> pmov mm2, qword [ecx+4*(%1)] 847 0000015F 0F6F9908020000 <1> pmov mm3, qword [ecx+4*(%1)+8] 848 00000166 0F0F5220B4 <1> pfmul mm2, qword [edx+4*(%2)] 849 0000016B 0F0F5A28B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 850 00000170 0F0FC29E <1> pfadd mm0, mm2 851 00000174 0F0FCB9E <1> pfadd mm1, mm3 852 muladd3 224, 12 853 00000178 0F6F9180030000 <1> pmov mm2, qword [ecx+4*(%1)] 854 0000017F 0F6F9988030000 <1> pmov mm3, qword [ecx+4*(%1)+8] 855 00000186 0F0F5230B4 <1> pfmul mm2, qword [edx+4*(%2)] 856 0000018B 0F0F5A38B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 857 00000190 0F0FC29E <1> pfadd mm0, mm2 858 00000194 0F0FCB9E <1> pfadd mm1, mm3 859 muladd3 256, 16 860 00000198 0F6F9100040000 <1> pmov mm2, qword [ecx+4*(%1)] 861 0000019F 0F6F9908040000 <1> pmov mm3, qword [ecx+4*(%1)+8] 862 000001A6 0F0F5240B4 <1> pfmul mm2, qword [edx+4*(%2)] 863 000001AB 0F0F5A48B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 864 000001B0 0F0FC29E <1> pfadd mm0, mm2 865 000001B4 0F0FCB9E <1> pfadd mm1, mm3 866 muladd3 352, 20 867 000001B8 0F6F9180050000 <1> pmov mm2, qword [ecx+4*(%1)] 868 000001BF 0F6F9988050000 <1> pmov mm3, qword [ecx+4*(%1)+8] 869 000001C6 0F0F5250B4 <1> pfmul mm2, qword [edx+4*(%2)] 870 000001CB 0F0F5A58B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 871 000001D0 0F0FC29E <1> pfadd mm0, mm2 872 000001D4 0F0FCB9E <1> pfadd mm1, mm3 873 muladd3 384, 24 874 000001D8 0F6F9100060000 <1> pmov mm2, qword [ecx+4*(%1)] 875 000001DF 0F6F9908060000 <1> pmov mm3, qword [ecx+4*(%1)+8] 876 000001E6 0F0F5260B4 <1> pfmul mm2, qword [edx+4*(%2)] 877 000001EB 0F0F5A68B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 878 000001F0 0F0FC29E <1> pfadd mm0, mm2 879 000001F4 0F0FCB9E <1> pfadd mm1, mm3 880 muladd3 480, 28 881 000001F8 0F6F9180070000 <1> pmov mm2, qword [ecx+4*(%1)] 882 000001FF 0F6F9988070000 <1> pmov mm3, qword [ecx+4*(%1)+8] 883 00000206 0F0F5270B4 <1> pfmul mm2, qword [edx+4*(%2)] 884 0000020B 0F0F5A78B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 885 00000210 0F0FC29E <1> pfadd mm0, mm2 886 00000214 0F0FCB9E <1> pfadd mm1, mm3 887 00000218 83EA80 sub edx, byte -128 888 muladd3 512, 0 889 0000021B 0F6F9100080000 <1> pmov mm2, qword [ecx+4*(%1)] 890 00000222 0F6F9908080000 <1> pmov mm3, qword [ecx+4*(%1)+8] 891 00000229 0F0F12B4 <1> pfmul mm2, qword [edx+4*(%2)] 892 0000022D 0F0F5A08B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 893 00000232 0F0FC29E <1> pfadd mm0, mm2 894 00000236 0F0FCB9E <1> pfadd mm1, mm3 895 muladd3 608, 4 896 0000023A 0F6F9180090000 <1> pmov mm2, qword [ecx+4*(%1)] 897 00000241 0F6F9988090000 <1> pmov mm3, qword [ecx+4*(%1)+8] 898 00000248 0F0F5210B4 <1> pfmul mm2, qword [edx+4*(%2)] 899 0000024D 0F0F5A18B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 900 00000252 0F0FC29E <1> pfadd mm0, mm2 901 00000256 0F0FCB9E <1> pfadd mm1, mm3 902 muladd3 640, 8 903 0000025A 0F6F91000A0000 <1> pmov mm2, qword [ecx+4*(%1)] 904 00000261 0F6F99080A0000 <1> pmov mm3, qword [ecx+4*(%1)+8] 905 00000268 0F0F5220B4 <1> pfmul mm2, qword [edx+4*(%2)] 906 0000026D 0F0F5A28B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 907 00000272 0F0FC29E <1> pfadd mm0, mm2 908 00000276 0F0FCB9E <1> pfadd mm1, mm3 909 muladd3 736, 12 910 0000027A 0F6F91800B0000 <1> pmov mm2, qword [ecx+4*(%1)] 911 00000281 0F6F99880B0000 <1> pmov mm3, qword [ecx+4*(%1)+8] 912 00000288 0F0F5230B4 <1> pfmul mm2, qword [edx+4*(%2)] 913 0000028D 0F0F5A38B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 914 00000292 0F0FC29E <1> pfadd mm0, mm2 915 00000296 0F0FCB9E <1> pfadd mm1, mm3 916 muladd3 768, 16 917 0000029A 0F6F91000C0000 <1> pmov mm2, qword [ecx+4*(%1)] 918 000002A1 0F6F99080C0000 <1> pmov mm3, qword [ecx+4*(%1)+8] 919 000002A8 0F0F5240B4 <1> pfmul mm2, qword [edx+4*(%2)] 920 000002AD 0F0F5A48B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 921 000002B2 0F0FC29E <1> pfadd mm0, mm2 922 000002B6 0F0FCB9E <1> pfadd mm1, mm3 923 muladd3 864, 20 924 000002BA 0F6F91800D0000 <1> pmov mm2, qword [ecx+4*(%1)] 925 000002C1 0F6F99880D0000 <1> pmov mm3, qword [ecx+4*(%1)+8] 926 000002C8 0F0F5250B4 <1> pfmul mm2, qword [edx+4*(%2)] 927 000002CD 0F0F5A58B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 928 000002D2 0F0FC29E <1> pfadd mm0, mm2 929 000002D6 0F0FCB9E <1> pfadd mm1, mm3 930 muladd3 896, 24 931 000002DA 0F6F91000E0000 <1> pmov mm2, qword [ecx+4*(%1)] 932 000002E1 0F6F99080E0000 <1> pmov mm3, qword [ecx+4*(%1)+8] 933 000002E8 0F0F5260B4 <1> pfmul mm2, qword [edx+4*(%2)] 934 000002ED 0F0F5A68B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 935 000002F2 0F0FC29E <1> pfadd mm0, mm2 936 000002F6 0F0FCB9E <1> pfadd mm1, mm3 937 muladd3 992, 28 938 000002FA 0F6F91800F0000 <1> pmov mm2, qword [ecx+4*(%1)] 939 00000301 0F6F99880F0000 <1> pmov mm3, qword [ecx+4*(%1)+8] 940 00000308 0F0F5270B4 <1> pfmul mm2, qword [edx+4*(%2)] 941 0000030D 0F0F5A78B4 <1> pfmul mm3, qword [edx+4*(%2)+8] 942 00000312 0F0FC29E <1> pfadd mm0, mm2 943 00000316 0F0FCB9E <1> pfadd mm1, mm3 944 0000031A 83EA80 sub edx, byte -128 945 ;add ecx, byte 16 946 0000031D 8D4910 lea ecx, [ecx+16] 947 948 00000320 0F0FC01D pf2id mm0, mm0 949 00000324 0F0FC91D pf2id mm1, mm1 950 00000328 0F7F03 pmov qword [ebx], mm0 951 0000032B 0F7F4B08 pmov qword [ebx+8], mm1 952 ;add ebx, byte 16 953 0000032F 8D5B10 lea ebx, [ebx+16] 954 00000332 48 dec eax 955 00000333 0F85E7FDFFFF jnz near lbl1 956 957 popd ebx 958 <1> %rep %0 959 <1> %rotate -1 960 <1> pop %1 961 <1> %assign %$STACK %$STACK-4 962 <1> %endrep 963 <2> %rotate -1 964 00000339 5B <2> pop %1 965 <2> %assign %$STACK %$STACK-4 966 endproc 967 <1> %ifnctx proc 968 <1> %error expected 'proc' before 'endproc'. 969 <1> %else 970 <1> %if %$STACK > 0 971 <1> add esp, %$STACK 972 <1> %endif 973 <1> 974 <1> %if %$STACK <> (-%$STACKN) 975 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 976 <1> %endif 977 <1> 978 0000033A C3 <1> ret 979 <1> %pop 980 <1> %endif 981 982 ; 983 ;*************************************************************************************** 984 ; 985 %macro muladdS 2 986 movaps xmm0, [ecx+4*(%1)] 987 mulps xmm0, [edx+4*(%2)] 988 addps xmm2, xmm0 989 %endmacro 990 991 0000033B 90 align 32 992 00000340 90 times 6 nop 993 proc VectorMult_SIMD 994 <1> %push proc 995 <1> global _%1 996 <1> global %1 997 <1> _%1: 998 <1> %1: 999 <1> %assign %$STACK 0 1000 <1> %assign %$STACKN 0 1001 <1> %assign %$ARG 4 1002 $buff2 arg 4 1003 <1> $buff2 equ %$ARG 1004 <1> %assign %$ARG %$ARG+%1 1005 $V2 arg 4 1006 <1> $V2 equ %$ARG 1007 <1> %assign %$ARG %$ARG+%1 1008 pushd ebx 1009 <1> %rep %0 1010 <1> push %1 1011 <1> %assign %$STACK %$STACK+4 1012 <1> %rotate 1 1013 <1> %endrep 1014 00000346 53 <2> push %1 1015 <2> %assign %$STACK %$STACK+4 1016 <2> %rotate 1 1017 00000347 8B5C2408 mov ebx, [sp($buff1)] 1018 0000034B 8B4C240C mov ecx, [sp($V2)] 1019 0000034F BA[00000000] mov edx, Di_opt_SIMD 1020 00000354 B808000000 mov eax, 8 1021 00000359 0F2825[F0080000] movaps xmm4, [bias2] 1022 lbl2: 1023 00000360 0F2811 movaps xmm2, [ecx] 1024 00000363 0F5912 mulps xmm2, [edx] 1025 1026 muladdS 96, 4 1027 00000366 0F288180010000 <1> movaps xmm0, [ecx+4*(%1)] 1028 0000036D 0F594210 <1> mulps xmm0, [edx+4*(%2)] 1029 00000371 0F58D0 <1> addps xmm2, xmm0 1030 muladdS 128, 8 1031 00000374 0F288100020000 <1> movaps xmm0, [ecx+4*(%1)] 1032 0000037B 0F594220 <1> mulps xmm0, [edx+4*(%2)] 1033 0000037F 0F58D0 <1> addps xmm2, xmm0 1034 muladdS 224, 12 1035 00000382 0F288180030000 <1> movaps xmm0, [ecx+4*(%1)] 1036 00000389 0F594230 <1> mulps xmm0, [edx+4*(%2)] 1037 0000038D 0F58D0 <1> addps xmm2, xmm0 1038 muladdS 256, 16 1039 00000390 0F288100040000 <1> movaps xmm0, [ecx+4*(%1)] 1040 00000397 0F594240 <1> mulps xmm0, [edx+4*(%2)] 1041 0000039B 0F58D0 <1> addps xmm2, xmm0 1042 muladdS 352, 20 1043 0000039E 0F288180050000 <1> movaps xmm0, [ecx+4*(%1)] 1044 000003A5 0F594250 <1> mulps xmm0, [edx+4*(%2)] 1045 000003A9 0F58D0 <1> addps xmm2, xmm0 1046 muladdS 384, 24 1047 000003AC 0F288100060000 <1> movaps xmm0, [ecx+4*(%1)] 1048 000003B3 0F594260 <1> mulps xmm0, [edx+4*(%2)] 1049 000003B7 0F58D0 <1> addps xmm2, xmm0 1050 muladdS 480, 28 1051 000003BA 0F288180070000 <1> movaps xmm0, [ecx+4*(%1)] 1052 000003C1 0F594270 <1> mulps xmm0, [edx+4*(%2)] 1053 000003C5 0F58D0 <1> addps xmm2, xmm0 1054 000003C8 83EA80 sub edx, byte -128 1055 muladdS 512, 0 1056 000003CB 0F288100080000 <1> movaps xmm0, [ecx+4*(%1)] 1057 000003D2 0F5902 <1> mulps xmm0, [edx+4*(%2)] 1058 000003D5 0F58D0 <1> addps xmm2, xmm0 1059 muladdS 608, 4 1060 000003D8 0F288180090000 <1> movaps xmm0, [ecx+4*(%1)] 1061 000003DF 0F594210 <1> mulps xmm0, [edx+4*(%2)] 1062 000003E3 0F58D0 <1> addps xmm2, xmm0 1063 muladdS 640, 8 1064 000003E6 0F2881000A0000 <1> movaps xmm0, [ecx+4*(%1)] 1065 000003ED 0F594220 <1> mulps xmm0, [edx+4*(%2)] 1066 000003F1 0F58D0 <1> addps xmm2, xmm0 1067 muladdS 736, 12 1068 000003F4 0F2881800B0000 <1> movaps xmm0, [ecx+4*(%1)] 1069 000003FB 0F594230 <1> mulps xmm0, [edx+4*(%2)] 1070 000003FF 0F58D0 <1> addps xmm2, xmm0 1071 muladdS 768, 16 1072 00000402 0F2881000C0000 <1> movaps xmm0, [ecx+4*(%1)] 1073 00000409 0F594240 <1> mulps xmm0, [edx+4*(%2)] 1074 0000040D 0F58D0 <1> addps xmm2, xmm0 1075 muladdS 864, 20 1076 00000410 0F2881800D0000 <1> movaps xmm0, [ecx+4*(%1)] 1077 00000417 0F594250 <1> mulps xmm0, [edx+4*(%2)] 1078 0000041B 0F58D0 <1> addps xmm2, xmm0 1079 muladdS 896, 24 1080 0000041E 0F2881000E0000 <1> movaps xmm0, [ecx+4*(%1)] 1081 00000425 0F594260 <1> mulps xmm0, [edx+4*(%2)] 1082 00000429 0F58D0 <1> addps xmm2, xmm0 1083 muladdS 992, 28 1084 0000042C 0F2881800F0000 <1> movaps xmm0, [ecx+4*(%1)] 1085 00000433 0F594270 <1> mulps xmm0, [edx+4*(%2)] 1086 00000437 0F58D0 <1> addps xmm2, xmm0 1087 0000043A 83EA80 sub edx, byte -128 1088 0000043D 83C110 add ecx, byte 16 1089 00000440 0F58D4 addps xmm2, xmm4 1090 1091 00000443 0F1113 movups [ebx], xmm2 1092 00000446 83C310 add ebx, byte 16 1093 00000449 48 dec eax 1094 0000044A 0F8510FFFFFF jnz near lbl2 1095 1096 popd ebx 1097 <1> %rep %0 1098 <1> %rotate -1 1099 <1> pop %1 1100 <1> %assign %$STACK %$STACK-4 1101 <1> %endrep 1102 <2> %rotate -1 1103 00000450 5B <2> pop %1 1104 <2> %assign %$STACK %$STACK-4 1105 endproc 1106 <1> %ifnctx proc 1107 <1> %error expected 'proc' before 'endproc'. 1108 <1> %else 1109 <1> %if %$STACK > 0 1110 <1> add esp, %$STACK 1111 <1> %endif 1112 <1> 1113 <1> %if %$STACK <> (-%$STACKN) 1114 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 1115 <1> %endif 1116 <1> 1117 00000451 C3 <1> ret 1118 <1> %pop 1119 <1> %endif 1120 1121 ;*************************************************************************************** 1122 ; 1123 1124 %define A(x) qword [ ebp + 4*(x) ] 1125 %define _A(x) dword [ ebp + 4*(x) ] 1126 %define B(x) qword [ ebp + 64 + 4*(x) ] 1127 %define _B(x) dword [ ebp + 64 + 4*(x) ] 1128 %define S(x) qword [ ecx + 4*(x) ] 1129 %define _S(x) dword [ ecx + 4*(x) ] 1130 %define V(x) qword [ edx + 4*(x) - 128] 1131 %define _V(x) dword [ edx + 4*(x) - 128] 1132 1133 %macro turn 2 ; dst, tmp 1134 punpckldq %2, %1 ; tmp = src.l | tmp.l 1135 punpckhdq %1, %2 ; src = src.l | src.h 1136 %endmacro 1137 1138 %macro copy2 2 1139 mov eax, _V(%2) 1140 mov ebx, _V(%2+1) 1141 mov _V(%1), eax 1142 mov _V(%1-1), ebx 1143 %endmacro 1144 1145 %macro copy1 2 1146 mov eax, _V(%2) 1147 mov _V(%1), eax 1148 %endmacro 1149 1150 %macro invcopy2 2 1151 mov eax, _V(%2) 1152 mov ebx, _V(%2+1) 1153 add eax, ecx 1154 add ebx, ecx 1155 ;lea eax, [eax+ecx] 1156 ;lea ebx, [ebx+ecx] 1157 mov _V(%1), eax 1158 mov _V(%1-1), ebx 1159 %endmacro 1160 1161 1162 ;************************************************************************************** 1163 1164 %macro tu_was31 0 1165 1166 ; B00 = A00 + A08; 1167 ; B01 = A01 + A09; 1168 ; B02 = A02 + A10; 1169 ; B03 = A03 + A11; 1170 1171 pmov mm0, A(0) 1172 pmov mm1, A(2) 1173 pfadd mm0, A(8) 1174 pfadd mm1, A(10) 1175 pmov B(0), mm0 1176 pmov B(2), mm1 1177 1178 ; B04 = A04 + A12; 1179 ; B05 = A05 + A13; 1180 ; B06 = A06 + A14; 1181 ; B07 = A07 + A15; 1182 1183 pmov mm0, A(4) 1184 pmov mm1, A(6) 1185 pfadd mm0, A(12) 1186 pfadd mm1, A(14) 1187 pmov B(4), mm0 1188 pmov B(6), mm1 1189 1190 ; B08 = (A00 - A08) * C[ 2]; 1191 ; B09 = (A01 - A09) * C[ 6]; 1192 ; B10 = (A02 - A10) * C[14]; 1193 ; B11 = (A03 - A11) * C[10]; 1194 1195 pmov mm0, A(0) 1196 pmov mm1, A(2) 1197 pfsub mm0, A(8) 1198 pfsub mm1, A(10) 1199 pfmul mm0, C02 1200 pfmul mm1, C14 1201 pmov B(8), mm0 1202 pmov B(10), mm1 1203 1204 ; B12 = (A04 - A12) * C[30]; 1205 ; B13 = (A05 - A13) * C[26]; 1206 ; B14 = (A06 - A14) * C[18]; 1207 ; B15 = (A07 - A15) * C[22]; 1208 1209 pmov mm0, A(4) 1210 pmov mm1, A(6) 1211 pfsub mm0, A(12) 1212 pfsub mm1, A(14) 1213 pfmul mm0, C30 1214 pfmul mm1, C18 1215 pmov B(12), mm0 1216 pmov B(14), mm1 1217 %endmacro 1218 1219 %macro tu_was32 0 1220 1221 ; A00 = B00 + B04; 1222 ; A01 = B01 + B05; 1223 ; A02 = B02 + B06; 1224 ; A03 = B03 + B07; 1225 1226 pmov mm0, B(0) 1227 pmov mm1, B(2) 1228 pfadd mm0, B(4) 1229 pfadd mm1, B(6) 1230 pmov A(0), mm0 1231 pmov A(2), mm1 1232 1233 ; A04 = (B00 - B04) * C[ 4]; 1234 ; A05 = (B01 - B05) * C[12]; 1235 ; A06 = (B02 - B06) * C[28]; 1236 ; A07 = (B03 - B07) * C[20]; 1237 1238 pmov mm0, B(0) 1239 pmov mm1, B(2) 1240 pfsub mm0, B(4) 1241 pfsub mm1, B(6) 1242 pfmul mm0, C04 1243 pfmul mm1, C28 1244 pmov A(4), mm0 1245 pmov A(6), mm1 1246 1247 ; A08 = B08 + B12; 1248 ; A09 = B09 + B13; 1249 ; A10 = B10 + B14; 1250 ; A11 = B11 + B15; 1251 1252 pmov mm0, B(8) 1253 pmov mm1, B(10) 1254 pfadd mm0, B(12) 1255 pfadd mm1, B(14) 1256 pmov A(8), mm0 1257 pmov A(10), mm1 1258 1259 ; A12 = (B08 - B12) * C[ 4]; 1260 ; A13 = (B09 - B13) * C[12]; 1261 ; A14 = (B10 - B14) * C[28]; 1262 ; A15 = (B11 - B15) * C[20]; 1263 1264 pmov mm0, B(8) 1265 pmov mm1, B(10) 1266 pfsub mm0, B(12) 1267 pfsub mm1, B(14) 1268 pfmul mm0, C04 1269 pfmul mm1, C28 1270 pmov A(12), mm0 1271 pmov A(14), mm1 1272 1273 %endmacro 1274 1275 %macro tu_was33 0 1276 1277 ; B00 = A00 + A02; 1278 ; B01 = A01 + A03; 1279 ; B02 = (A00 - A02) * C[ 8]; 1280 ; B03 = (A01 - A03) * C[24]; 1281 1282 pmov mm6, C08 1283 1284 pmov mm0, A(0) 1285 pmov mm1, A(2) 1286 pmov mm2, mm0 1287 pfsub mm0, mm1 1288 pfadd mm2, mm1 1289 pfmul mm0, mm6 1290 pmov B(0), mm2 1291 pmov B(2), mm0 1292 1293 ; B04 = A04 + A06; 1294 ; B05 = A05 + A07; 1295 ; B06 = (A04 - A06) * C[ 8]; 1296 ; B07 = (A05 - A07) * C[24]; 1297 1298 pmov mm0, A(4) 1299 pmov mm1, A(6) 1300 pmov mm2, mm0 1301 pfsub mm0, mm1 1302 pfadd mm2, mm1 1303 pfmul mm0, mm6 1304 pmov B(4), mm2 1305 pmov B(6), mm0 1306 1307 ; B08 = A08 + A10; 1308 ; B09 = A09 + A11; 1309 ; B10 = (A08 - A10) * C[ 8]; 1310 ; B11 = (A09 - A11) * C[24]; 1311 1312 pmov mm0, A(8) 1313 pmov mm1, A(10) 1314 pmov mm2, mm0 1315 pfsub mm0, mm1 1316 pfadd mm2, mm1 1317 pfmul mm0, mm6 1318 pmov B(8), mm2 1319 pmov B(10), mm0 1320 1321 ; B12 = A12 + A14; 1322 ; B13 = A13 + A15; 1323 ; B14 = (A12 - A14) * C[ 8]; 1324 ; B15 = (A13 - A15) * C[24]; 1325 1326 pmov mm0, A(12) 1327 pmov mm1, A(14) 1328 pmov mm2, mm0 1329 pfsub mm0, mm1 1330 pfadd mm2, mm1 1331 pfmul mm0, mm6 1332 pmov B(12), mm2 1333 pmov B(14), mm0 1334 1335 %endmacro 1336 1337 %macro tu_was34 0 1338 1339 ; A00 = B00 + B01; 1340 ; A01 = (B00 - B01) * C[16]; 1341 ; A02 = B02 + B03; 1342 ; A03 = (B02 - B03) * C[16]; 1343 1344 pmov mm6, C16 1345 1346 pmov mm0, B(0) 1347 pmov mm1, B(2) 1348 pmov mm2, mm0 1349 pmov mm3, mm1 1350 pfmul mm0, mm6 1351 pfmul mm1, mm6 1352 pfacc mm2, mm0 1353 pfacc mm3, mm1 1354 pmov A(0), mm2 1355 pmov A(2), mm3 1356 1357 ; A04 = B04 + B05; 1358 ; A05 = (B04 - B05) * C[16]; 1359 ; A06 = B06 + B07; 1360 ; A07 = (B06 - B07) * C[16]; 1361 1362 pmov mm0, B(4) 1363 pmov mm1, B(6) 1364 pmov mm2, mm0 1365 pmov mm3, mm1 1366 pfmul mm0, mm6 1367 pfmul mm1, mm6 1368 pfacc mm2, mm0 1369 pfacc mm3, mm1 1370 pmov A(4), mm2 1371 pmov A(6), mm3 1372 1373 ; A08 = B08 + B09; 1374 ; A09 = (B08 - B09) * C[16]; 1375 ; A10 = B10 + B11; 1376 ; A11 = (B10 - B11) * C[16]; 1377 1378 pmov mm0, B(8) 1379 pmov mm1, B(10) 1380 pmov mm2, mm0 1381 pmov mm3, mm1 1382 pfmul mm0, mm6 1383 pfmul mm1, mm6 1384 pfacc mm2, mm0 1385 pfacc mm3, mm1 1386 pmov A(8), mm2 1387 pmov A(10), mm3 1388 1389 ; A12 = B12 + B13; 1390 ; A13 = (B12 - B13) * C[16]; 1391 ; A14 = B14 + B15; 1392 ; A15 = (B14 - B15) * C[16]; 1393 1394 pmov mm0, B(12) 1395 pmov mm1, B(14) 1396 pmov mm2, mm0 1397 pmov mm3, mm1 1398 pfmul mm0, mm6 1399 pfmul mm1, mm6 1400 pfacc mm2, mm0 1401 pfacc mm3, mm1 1402 pmov A(12), mm2 1403 pmov A(14), mm3 1404 1405 %endmacro 1406 1407 ;*************************************************************************** 1408 1409 00000452 90 align 32 1410 proc Calculate_New_V_3DNow 1411 <1> %push proc 1412 <1> global _%1 1413 <1> global %1 1414 <1> _%1: 1415 <1> %1: 1416 <1> %assign %$STACK 0 1417 <1> %assign %$STACKN 0 1418 <1> %assign %$ARG 4 1419 $S4 arg 4 1420 <1> $S4 equ %$ARG 1421 <1> %assign %$ARG %$ARG+%1 1422 $V4 arg 4 1423 <1> $V4 equ %$ARG 1424 <1> %assign %$ARG %$ARG+%1 1425 00000460 8B4C2404 mov ecx, [sp($S4)] 1426 00000464 8B542408 mov edx, [sp($V4)] 1427 00000468 83EA80 sub edx, byte -128 1428 0000046B 53 push ebx 1429 0000046C 55 push ebp 1430 0000046D B8[60080000] mov eax, C 1431 00000472 89E3 mov ebx, esp 1432 00000474 83C480 add esp, byte -128 1433 00000477 83E4C0 and esp, byte 0xFFFFFFC0 1434 0000047A 89E5 mov ebp, esp 1435 1436 ; A00 = S[ 0] + S[31]; 1437 ; A01 = S[ 1] + S[30]; 1438 ; A02 = S[ 3] + S[28]; 1439 ; A03 = S[ 2] + S[29]; 1440 1441 0000047C 0F6F4178 pmov mm0, S(30) 1442 00000480 0F6F4908 pmov mm1, S(2) 1443 turn mm0, mm6 1444 00000484 0F62F0 <1> punpckldq %2, %1 1445 00000487 0F6AC6 <1> punpckhdq %1, %2 1446 turn mm1, mm7 1447 0000048A 0F62F9 <1> punpckldq %2, %1 1448 0000048D 0F6ACF <1> punpckhdq %1, %2 1449 00000490 0F0F019E pfadd mm0, S(0) 1450 00000494 0F0F49709E pfadd mm1, S(28) 1451 00000499 0F7F4500 pmov A(0), mm0 1452 0000049D 0F7F4D08 pmov A(2), mm1 1453 1454 ; A04 = S[ 7] + S[24]; 1455 ; A05 = S[ 6] + S[25]; 1456 ; A06 = S[ 4] + S[27]; 1457 ; A07 = S[ 5] + S[26]; 1458 1459 000004A1 0F6F4118 pmov mm0, S(6) 1460 000004A5 0F6F4968 pmov mm1, S(26) 1461 turn mm0, mm6 1462 000004A9 0F62F0 <1> punpckldq %2, %1 1463 000004AC 0F6AC6 <1> punpckhdq %1, %2 1464 turn mm1, mm7 1465 000004AF 0F62F9 <1> punpckldq %2, %1 1466 000004B2 0F6ACF <1> punpckhdq %1, %2 1467 000004B5 0F0F41609E pfadd mm0, S(24) 1468 000004BA 0F0F49109E pfadd mm1, S(4) 1469 000004BF 0F7F4510 pmov A(4), mm0 1470 000004C3 0F7F4D18 pmov A(6), mm1 1471 1472 ; A08 = S[15] + S[16]; 1473 ; A09 = S[14] + S[17]; 1474 ; A10 = S[12] + S[19]; 1475 ; A11 = S[13] + S[18]; 1476 1477 000004C7 0F6F4138 pmov mm0, S(14) 1478 000004CB 0F6F4948 pmov mm1, S(18) 1479 turn mm0, mm6 1480 000004CF 0F62F0 <1> punpckldq %2, %1 1481 000004D2 0F6AC6 <1> punpckhdq %1, %2 1482 turn mm1, mm7 1483 000004D5 0F62F9 <1> punpckldq %2, %1 1484 000004D8 0F6ACF <1> punpckhdq %1, %2 1485 000004DB 0F0F41409E pfadd mm0, S(16) 1486 000004E0 0F0F49309E pfadd mm1, S(12) 1487 000004E5 0F7F4520 pmov A(8), mm0 1488 000004E9 0F7F4D28 pmov A(10), mm1 1489 1490 ; A12 = S[ 8] + S[23]; 1491 ; A13 = S[ 9] + S[22]; 1492 ; A14 = S[11] + S[20]; 1493 ; A15 = S[10] + S[21]; 1494 1495 000004ED 0F6F4158 pmov mm0, S(22) 1496 000004F1 0F6F4928 pmov mm1, S(10) 1497 turn mm0, mm6 1498 000004F5 0F62F0 <1> punpckldq %2, %1 1499 000004F8 0F6AC6 <1> punpckhdq %1, %2 1500 turn mm1, mm7 1501 000004FB 0F62F9 <1> punpckldq %2, %1 1502 000004FE 0F6ACF <1> punpckhdq %1, %2 1503 00000501 0F0F41209E pfadd mm0, S(8) 1504 00000506 0F0F49509E pfadd mm1, S(20) 1505 0000050B 0F7F4530 pmov A(12), mm0 1506 0000050F 0F7F4D38 pmov A(14), mm1 1507 1508 tu_was31 1509 <1> 1510 <1> 1511 <1> 1512 <1> 1513 <1> 1514 <1> 1515 00000513 0F6F4500 <1> pmov mm0, A(0) 1516 00000517 0F6F4D08 <1> pmov mm1, A(2) 1517 0000051B 0F0F45209E <1> pfadd mm0, A(8) 1518 00000520 0F0F4D289E <1> pfadd mm1, A(10) 1519 00000525 0F7F4540 <1> pmov B(0), mm0 1520 00000529 0F7F4D48 <1> pmov B(2), mm1 1521 <1> 1522 <1> 1523 <1> 1524 <1> 1525 <1> 1526 <1> 1527 0000052D 0F6F4510 <1> pmov mm0, A(4) 1528 00000531 0F6F4D18 <1> pmov mm1, A(6) 1529 00000535 0F0F45309E <1> pfadd mm0, A(12) 1530 0000053A 0F0F4D389E <1> pfadd mm1, A(14) 1531 0000053F 0F7F4550 <1> pmov B(4), mm0 1532 00000543 0F7F4D58 <1> pmov B(6), mm1 1533 <1> 1534 <1> 1535 <1> 1536 <1> 1537 <1> 1538 <1> 1539 00000547 0F6F4500 <1> pmov mm0, A(0) 1540 0000054B 0F6F4D08 <1> pmov mm1, A(2) 1541 0000054F 0F0F45209A <1> pfsub mm0, A(8) 1542 00000554 0F0F4D289A <1> pfsub mm1, A(10) 1543 00000559 0F0F4020B4 <1> pfmul mm0, C02 1544 0000055E 0F0F4828B4 <1> pfmul mm1, C14 1545 00000563 0F7F4560 <1> pmov B(8), mm0 1546 00000567 0F7F4D68 <1> pmov B(10), mm1 1547 <1> 1548 <1> 1549 <1> 1550 <1> 1551 <1> 1552 <1> 1553 0000056B 0F6F4510 <1> pmov mm0, A(4) 1554 0000056F 0F6F4D18 <1> pmov mm1, A(6) 1555 00000573 0F0F45309A <1> pfsub mm0, A(12) 1556 00000578 0F0F4D389A <1> pfsub mm1, A(14) 1557 0000057D 0F0F4030B4 <1> pfmul mm0, C30 1558 00000582 0F0F4838B4 <1> pfmul mm1, C18 1559 00000587 0F7F4570 <1> pmov B(12), mm0 1560 0000058B 0F7F4D78 <1> pmov B(14), mm1 1561 tu_was32 1562 <1> 1563 <1> 1564 <1> 1565 <1> 1566 <1> 1567 <1> 1568 0000058F 0F6F4540 <1> pmov mm0, B(0) 1569 00000593 0F6F4D48 <1> pmov mm1, B(2) 1570 00000597 0F0F45509E <1> pfadd mm0, B(4) 1571 0000059C 0F0F4D589E <1> pfadd mm1, B(6) 1572 000005A1 0F7F4500 <1> pmov A(0), mm0 1573 000005A5 0F7F4D08 <1> pmov A(2), mm1 1574 <1> 1575 <1> 1576 <1> 1577 <1> 1578 <1> 1579 <1> 1580 000005A9 0F6F4540 <1> pmov mm0, B(0) 1581 000005AD 0F6F4D48 <1> pmov mm1, B(2) 1582 000005B1 0F0F45509A <1> pfsub mm0, B(4) 1583 000005B6 0F0F4D589A <1> pfsub mm1, B(6) 1584 000005BB 0F0F4010B4 <1> pfmul mm0, C04 1585 000005C0 0F0F4818B4 <1> pfmul mm1, C28 1586 000005C5 0F7F4510 <1> pmov A(4), mm0 1587 000005C9 0F7F4D18 <1> pmov A(6), mm1 1588 <1> 1589 <1> 1590 <1> 1591 <1> 1592 <1> 1593 <1> 1594 000005CD 0F6F4560 <1> pmov mm0, B(8) 1595 000005D1 0F6F4D68 <1> pmov mm1, B(10) 1596 000005D5 0F0F45709E <1> pfadd mm0, B(12) 1597 000005DA 0F0F4D789E <1> pfadd mm1, B(14) 1598 000005DF 0F7F4520 <1> pmov A(8), mm0 1599 000005E3 0F7F4D28 <1> pmov A(10), mm1 1600 <1> 1601 <1> 1602 <1> 1603 <1> 1604 <1> 1605 <1> 1606 000005E7 0F6F4560 <1> pmov mm0, B(8) 1607 000005EB 0F6F4D68 <1> pmov mm1, B(10) 1608 000005EF 0F0F45709A <1> pfsub mm0, B(12) 1609 000005F4 0F0F4D789A <1> pfsub mm1, B(14) 1610 000005F9 0F0F4010B4 <1> pfmul mm0, C04 1611 000005FE 0F0F4818B4 <1> pfmul mm1, C28 1612 00000603 0F7F4530 <1> pmov A(12), mm0 1613 00000607 0F7F4D38 <1> pmov A(14), mm1 1614 <1> 1615 tu_was33 1616 <1> 1617 <1> 1618 <1> 1619 <1> 1620 <1> 1621 <1> 1622 0000060B 0F6F7008 <1> pmov mm6, C08 1623 <1> 1624 0000060F 0F6F4500 <1> pmov mm0, A(0) 1625 00000613 0F6F4D08 <1> pmov mm1, A(2) 1626 00000617 0F6FD0 <1> pmov mm2, mm0 1627 0000061A 0F0FC19A <1> pfsub mm0, mm1 1628 0000061E 0F0FD19E <1> pfadd mm2, mm1 1629 00000622 0F0FC6B4 <1> pfmul mm0, mm6 1630 00000626 0F7F5540 <1> pmov B(0), mm2 1631 0000062A 0F7F4548 <1> pmov B(2), mm0 1632 <1> 1633 <1> 1634 <1> 1635 <1> 1636 <1> 1637 <1> 1638 0000062E 0F6F4510 <1> pmov mm0, A(4) 1639 00000632 0F6F4D18 <1> pmov mm1, A(6) 1640 00000636 0F6FD0 <1> pmov mm2, mm0 1641 00000639 0F0FC19A <1> pfsub mm0, mm1 1642 0000063D 0F0FD19E <1> pfadd mm2, mm1 1643 00000641 0F0FC6B4 <1> pfmul mm0, mm6 1644 00000645 0F7F5550 <1> pmov B(4), mm2 1645 00000649 0F7F4558 <1> pmov B(6), mm0 1646 <1> 1647 <1> 1648 <1> 1649 <1> 1650 <1> 1651 <1> 1652 0000064D 0F6F4520 <1> pmov mm0, A(8) 1653 00000651 0F6F4D28 <1> pmov mm1, A(10) 1654 00000655 0F6FD0 <1> pmov mm2, mm0 1655 00000658 0F0FC19A <1> pfsub mm0, mm1 1656 0000065C 0F0FD19E <1> pfadd mm2, mm1 1657 00000660 0F0FC6B4 <1> pfmul mm0, mm6 1658 00000664 0F7F5560 <1> pmov B(8), mm2 1659 00000668 0F7F4568 <1> pmov B(10), mm0 1660 <1> 1661 <1> 1662 <1> 1663 <1> 1664 <1> 1665 <1> 1666 0000066C 0F6F4530 <1> pmov mm0, A(12) 1667 00000670 0F6F4D38 <1> pmov mm1, A(14) 1668 00000674 0F6FD0 <1> pmov mm2, mm0 1669 00000677 0F0FC19A <1> pfsub mm0, mm1 1670 0000067B 0F0FD19E <1> pfadd mm2, mm1 1671 0000067F 0F0FC6B4 <1> pfmul mm0, mm6 1672 00000683 0F7F5570 <1> pmov B(12), mm2 1673 00000687 0F7F4578 <1> pmov B(14), mm0 1674 <1> 1675 tu_was34 1676 <1> 1677 <1> 1678 <1> 1679 <1> 1680 <1> 1681 <1> 1682 0000068B 0F6F30 <1> pmov mm6, C16 1683 <1> 1684 0000068E 0F6F4540 <1> pmov mm0, B(0) 1685 00000692 0F6F4D48 <1> pmov mm1, B(2) 1686 00000696 0F6FD0 <1> pmov mm2, mm0 1687 00000699 0F6FD9 <1> pmov mm3, mm1 1688 0000069C 0F0FC6B4 <1> pfmul mm0, mm6 1689 000006A0 0F0FCEB4 <1> pfmul mm1, mm6 1690 000006A4 0F0FD0AE <1> pfacc mm2, mm0 1691 000006A8 0F0FD9AE <1> pfacc mm3, mm1 1692 000006AC 0F7F5500 <1> pmov A(0), mm2 1693 000006B0 0F7F5D08 <1> pmov A(2), mm3 1694 <1> 1695 <1> 1696 <1> 1697 <1> 1698 <1> 1699 <1> 1700 000006B4 0F6F4550 <1> pmov mm0, B(4) 1701 000006B8 0F6F4D58 <1> pmov mm1, B(6) 1702 000006BC 0F6FD0 <1> pmov mm2, mm0 1703 000006BF 0F6FD9 <1> pmov mm3, mm1 1704 000006C2 0F0FC6B4 <1> pfmul mm0, mm6 1705 000006C6 0F0FCEB4 <1> pfmul mm1, mm6 1706 000006CA 0F0FD0AE <1> pfacc mm2, mm0 1707 000006CE 0F0FD9AE <1> pfacc mm3, mm1 1708 000006D2 0F7F5510 <1> pmov A(4), mm2 1709 000006D6 0F7F5D18 <1> pmov A(6), mm3 1710 <1> 1711 <1> 1712 <1> 1713 <1> 1714 <1> 1715 <1> 1716 000006DA 0F6F4560 <1> pmov mm0, B(8) 1717 000006DE 0F6F4D68 <1> pmov mm1, B(10) 1718 000006E2 0F6FD0 <1> pmov mm2, mm0 1719 000006E5 0F6FD9 <1> pmov mm3, mm1 1720 000006E8 0F0FC6B4 <1> pfmul mm0, mm6 1721 000006EC 0F0FCEB4 <1> pfmul mm1, mm6 1722 000006F0 0F0FD0AE <1> pfacc mm2, mm0 1723 000006F4 0F0FD9AE <1> pfacc mm3, mm1 1724 000006F8 0F7F5520 <1> pmov A(8), mm2 1725 000006FC 0F7F5D28 <1> pmov A(10), mm3 1726 <1> 1727 <1> 1728 <1> 1729 <1> 1730 <1> 1731 <1> 1732 00000700 0F6F4570 <1> pmov mm0, B(12) 1733 00000704 0F6F4D78 <1> pmov mm1, B(14) 1734 00000708 0F6FD0 <1> pmov mm2, mm0 1735 0000070B 0F6FD9 <1> pmov mm3, mm1 1736 0000070E 0F0FC6B4 <1> pfmul mm0, mm6 1737 00000712 0F0FCEB4 <1> pfmul mm1, mm6 1738 00000716 0F0FD0AE <1> pfacc mm2, mm0 1739 0000071A 0F0FD9AE <1> pfacc mm3, mm1 1740 0000071E 0F7F5530 <1> pmov A(12), mm2 1741 00000722 0F7F5D38 <1> pmov A(14), mm3 1742 <1> 1743 1744 00000726 0F6F3D[00090000] pmov mm7, qword [negativ] 1745 1746 ; V[48] = -A00; 1747 ; V[ 0] = A01; 1748 ; V[40] = -A02 - (V[ 8] = A03); 1749 ; 0 1 2 3 4 5 6 7 1750 0000072D 0F6E550C movd mm2, _A(3) ; 3 - 1751 00000731 0F6E4500 movd mm0, _A(0) ; 0 3 - 1752 00000735 0F7E52A0 movd _V(8), mm2 1753 00000739 0F6E4D04 movd mm1, _A(1) ; 0 1 3 - 1754 0000073D 0F0F55089E pfadd mm2, A(2) ; 0 1 2+3 - 1755 00000742 0FEFC7 pxor mm0, mm7 ; -0 1 2+3 - 1756 00000745 0FEFD7 pxor mm2, mm7 ; -0 1 -2-3 - 1757 00000748 0F7E4A80 movd _V(0), mm1 1758 0000074C 0F7E4240 movd _V(48), mm0 1759 00000750 0F7E5220 movd _V(40), mm2 1760 1761 ; V[36] = -((V[ 4] = A05 + (V[12] = A07)) + A06); 1762 ; V[44] = - A04 - A06 - A07; 1763 1764 00000754 0F6E451C movd mm0, _A(7) ; 7 - 1765 00000758 0F6F4D18 pmov mm1, A(6) ; 7 6 - 1766 0000075C 0F7E42B0 movd _V(12), mm0 1767 00000760 0F0F45149E pfadd mm0, A(5) ; 5+7 6 - 1768 00000765 0F7E4290 movd _V(4), mm0 1769 00000769 0F0FC19E pfadd mm0, mm1 ; 5+6+7 6 - 1770 0000076D 0F0FC9AE pfacc mm1, mm1 ; 5+6+7 6+7 - 1771 00000771 0F0F4D109E pfadd mm1, A(4) ; 5+6+7 4+6+7 - 1772 00000776 0FEFC7 pxor mm0, mm7 ;-5-6-7 4+6+7 - 1773 00000779 0FEFCF pxor mm1, mm7 ;-5-6-7 -4-6-7 - 1774 0000077C 0F7E4210 movd _V(36), mm0 1775 00000780 0F7E4A30 movd _V(44), mm1 1776 1777 ; V[ 6] = (V[10] = A11 + (V[14] = A15)) + A13; 1778 ; V[38] = (V[34] = -(V[ 2] = A09 + A13 + A15) - A14) + A09 - A10 - A11; 1779 1780 00000784 0F6E553C movd mm2, _A(15) ; 15 1781 00000788 0F6E4524 movd mm0, _A(9) ; 9 15 1782 0000078C 0F7E52B8 movd _V(14), mm2 1783 00000790 0F0F45349E pfadd mm0, A(13) ; 9+13 15 1784 00000795 0F0FC29E pfadd mm0, mm2 ; 9+13+15 1785 00000799 0F7E4288 movd _V(2), mm0 1786 0000079D 0F0F552C9E pfadd mm2, A(11) ; 9+13+15 11+15 1787 000007A2 0F0F45389E pfadd mm0, A(14) ; 9+13+14+15 1788 000007A7 0F7E52A8 movd _V(10), mm2 1789 000007AB 0FEFC7 pxor mm0, mm7 ;-9-13-14-15 1790 000007AE 0F0F55349E pfadd mm2, A(13) ;-9-13-14-15 11+13+15 1791 000007B3 0F6F7528 pmov mm6, A(10) ;-9-13-14-15 11+13+15 10 1792 000007B7 0F7E4208 movd _V(34), mm0 1793 000007BB 0F0FF6AE pfacc mm6, mm6 ;-9-13-14-15 11+13+15 10+11 1794 000007BF 0F7E5298 movd _V(6), mm2 1795 000007C3 0F0F45249E pfadd mm0, A(9) ;-13-14-15 11+13+15 10+11 1796 000007C8 0F0FC69A pfsub mm0, mm6 ;-10-11-13-14-15 1797 000007CC 0F7E4218 movd _V(38), mm0 1798 1799 ; V[46] = (tmp = -(A12 + A14 + A15)) - A08; 1800 1801 000007D0 0F6E4D30 movd mm1, _A(12) ; 12 1802 000007D4 0F0F4D389E pfadd mm1, A(14) ; 12+14 1803 000007D9 0F0F4D3C9E pfadd mm1, A(15) ; 12+14+15 1804 000007DE 0FEFCF pxor mm1, mm7 ; -12-14-15 1805 000007E1 0F0FF1AA pfsubr mm6, mm1 ; -12-14-15 -10-11-12-14-15 1806 000007E5 0F0F4D209A pfsub mm1, A(8) ; -8-12-14-15 1807 000007EA 0F7E4A38 movd _V(46), mm1 1808 1809 ; V[42] = tmp - A10 - A11; // abhängig vom Befehl drüber 1810 1811 000007EE 0F7E7228 movd _V(42), mm6 1812 1813 ; A00 = (S[ 0] - S[31]) * C[ 1]; 1814 ; A01 = (S[ 1] - S[30]) * C[ 3]; 1815 ; A02 = (S[ 3] - S[28]) * C[ 7]; 1816 ; A03 = (S[ 2] - S[29]) * C[ 5]; 1817 1818 000007F2 0F6F4178 pmov mm0, S(30) 1819 000007F6 0F6F4908 pmov mm1, S(2) 1820 turn mm0, mm6 1821 000007FA 0F62F0 <1> punpckldq %2, %1 1822 000007FD 0F6AC6 <1> punpckhdq %1, %2 1823 turn mm1, mm7 1824 00000800 0F62F9 <1> punpckldq %2, %1 1825 00000803 0F6ACF <1> punpckhdq %1, %2 1826 00000806 0F0F01AA pfsubr mm0, S(0) 1827 0000080A 0F0F49709A pfsub mm1, S(28) 1828 0000080F 0F0F4040B4 pfmul mm0, C01 1829 00000814 0F0F4848B4 pfmul mm1, C07 1830 00000819 0F7F4500 pmov A(0), mm0 1831 0000081D 0F7F4D08 pmov A(2), mm1 1832 1833 ; A04 = (S[ 7] - S[24]) * C[15]; 1834 ; A05 = (S[ 6] - S[25]) * C[13]; 1835 ; A06 = (S[ 4] - S[27]) * C[ 9]; 1836 ; A07 = (S[ 5] - S[26]) * C[11]; 1837 1838 00000821 0F6F4118 pmov mm0, S(6) 1839 00000825 0F6F4968 pmov mm1, S(26) 1840 turn mm0, mm6 1841 00000829 0F62F0 <1> punpckldq %2, %1 1842 0000082C 0F6AC6 <1> punpckhdq %1, %2 1843 turn mm1, mm7 1844 0000082F 0F62F9 <1> punpckldq %2, %1 1845 00000832 0F6ACF <1> punpckhdq %1, %2 1846 00000835 0F0F41609A pfsub mm0, S(24) 1847 0000083A 0F0F4910AA pfsubr mm1, S(4) 1848 0000083F 0F0F4050B4 pfmul mm0, C15 1849 00000844 0F0F4858B4 pfmul mm1, C09 1850 00000849 0F7F4510 pmov A(4), mm0 1851 0000084D 0F7F4D18 pmov A(6), mm1 1852 1853 ; A08 = (S[15] - S[16]) * C[31]; 1854 ; A09 = (S[14] - S[17]) * C[29]; 1855 ; A10 = (S[12] - S[19]) * C[25]; 1856 ; A11 = (S[13] - S[18]) * C[27]; 1857 1858 00000851 0F6F4138 pmov mm0, S(14) 1859 00000855 0F6F4948 pmov mm1, S(18) 1860 turn mm0, mm6 1861 00000859 0F62F0 <1> punpckldq %2, %1 1862 0000085C 0F6AC6 <1> punpckhdq %1, %2 1863 turn mm1, mm7 1864 0000085F 0F62F9 <1> punpckldq %2, %1 1865 00000862 0F6ACF <1> punpckhdq %1, %2 1866 00000865 0F0F41409A pfsub mm0, S(16) 1867 0000086A 0F0F4930AA pfsubr mm1, S(12) 1868 0000086F 0F0F4060B4 pfmul mm0, C31 1869 00000874 0F0F4868B4 pfmul mm1, C25 1870 00000879 0F7F4520 pmov A(8), mm0 1871 0000087D 0F7F4D28 pmov A(10), mm1 1872 1873 ; A12 = (S[ 8] - S[23]) * C[17]; 1874 ; A13 = (S[ 9] - S[22]) * C[19]; 1875 ; A14 = (S[11] - S[20]) * C[23]; 1876 ; A15 = (S[10] - S[21]) * C[21]; 1877 1878 00000881 0F6F4158 pmov mm0, S(22) 1879 00000885 0F6F4928 pmov mm1, S(10) 1880 turn mm0, mm6 1881 00000889 0F62F0 <1> punpckldq %2, %1 1882 0000088C 0F6AC6 <1> punpckhdq %1, %2 1883 turn mm1, mm7 1884 0000088F 0F62F9 <1> punpckldq %2, %1 1885 00000892 0F6ACF <1> punpckhdq %1, %2 1886 00000895 0F0F4120AA pfsubr mm0, S(8) 1887 0000089A 0F0F49509A pfsub mm1, S(20) 1888 0000089F 0F0F4070B4 pfmul mm0, C17 1889 000008A4 0F0F4878B4 pfmul mm1, C23 1890 000008A9 0F7F4530 pmov A(12), mm0 1891 000008AD 0F7F4D38 pmov A(14), mm1 1892 1893 tu_was31 1894 <1> 1895 <1> 1896 <1> 1897 <1> 1898 <1> 1899 <1> 1900 000008B1 0F6F4500 <1> pmov mm0, A(0) 1901 000008B5 0F6F4D08 <1> pmov mm1, A(2) 1902 000008B9 0F0F45209E <1> pfadd mm0, A(8) 1903 000008BE 0F0F4D289E <1> pfadd mm1, A(10) 1904 000008C3 0F7F4540 <1> pmov B(0), mm0 1905 000008C7 0F7F4D48 <1> pmov B(2), mm1 1906 <1> 1907 <1> 1908 <1> 1909 <1> 1910 <1> 1911 <1> 1912 000008CB 0F6F4510 <1> pmov mm0, A(4) 1913 000008CF 0F6F4D18 <1> pmov mm1, A(6) 1914 000008D3 0F0F45309E <1> pfadd mm0, A(12) 1915 000008D8 0F0F4D389E <1> pfadd mm1, A(14) 1916 000008DD 0F7F4550 <1> pmov B(4), mm0 1917 000008E1 0F7F4D58 <1> pmov B(6), mm1 1918 <1> 1919 <1> 1920 <1> 1921 <1> 1922 <1> 1923 <1> 1924 000008E5 0F6F4500 <1> pmov mm0, A(0) 1925 000008E9 0F6F4D08 <1> pmov mm1, A(2) 1926 000008ED 0F0F45209A <1> pfsub mm0, A(8) 1927 000008F2 0F0F4D289A <1> pfsub mm1, A(10) 1928 000008F7 0F0F4020B4 <1> pfmul mm0, C02 1929 000008FC 0F0F4828B4 <1> pfmul mm1, C14 1930 00000901 0F7F4560 <1> pmov B(8), mm0 1931 00000905 0F7F4D68 <1> pmov B(10), mm1 1932 <1> 1933 <1> 1934 <1> 1935 <1> 1936 <1> 1937 <1> 1938 00000909 0F6F4510 <1> pmov mm0, A(4) 1939 0000090D 0F6F4D18 <1> pmov mm1, A(6) 1940 00000911 0F0F45309A <1> pfsub mm0, A(12) 1941 00000916 0F0F4D389A <1> pfsub mm1, A(14) 1942 0000091B 0F0F4030B4 <1> pfmul mm0, C30 1943 00000920 0F0F4838B4 <1> pfmul mm1, C18 1944 00000925 0F7F4570 <1> pmov B(12), mm0 1945 00000929 0F7F4D78 <1> pmov B(14), mm1 1946 tu_was32 1947 <1> 1948 <1> 1949 <1> 1950 <1> 1951 <1> 1952 <1> 1953 0000092D 0F6F4540 <1> pmov mm0, B(0) 1954 00000931 0F6F4D48 <1> pmov mm1, B(2) 1955 00000935 0F0F45509E <1> pfadd mm0, B(4) 1956 0000093A 0F0F4D589E <1> pfadd mm1, B(6) 1957 0000093F 0F7F4500 <1> pmov A(0), mm0 1958 00000943 0F7F4D08 <1> pmov A(2), mm1 1959 <1> 1960 <1> 1961 <1> 1962 <1> 1963 <1> 1964 <1> 1965 00000947 0F6F4540 <1> pmov mm0, B(0) 1966 0000094B 0F6F4D48 <1> pmov mm1, B(2) 1967 0000094F 0F0F45509A <1> pfsub mm0, B(4) 1968 00000954 0F0F4D589A <1> pfsub mm1, B(6) 1969 00000959 0F0F4010B4 <1> pfmul mm0, C04 1970 0000095E 0F0F4818B4 <1> pfmul mm1, C28 1971 00000963 0F7F4510 <1> pmov A(4), mm0 1972 00000967 0F7F4D18 <1> pmov A(6), mm1 1973 <1> 1974 <1> 1975 <1> 1976 <1> 1977 <1> 1978 <1> 1979 0000096B 0F6F4560 <1> pmov mm0, B(8) 1980 0000096F 0F6F4D68 <1> pmov mm1, B(10) 1981 00000973 0F0F45709E <1> pfadd mm0, B(12) 1982 00000978 0F0F4D789E <1> pfadd mm1, B(14) 1983 0000097D 0F7F4520 <1> pmov A(8), mm0 1984 00000981 0F7F4D28 <1> pmov A(10), mm1 1985 <1> 1986 <1> 1987 <1> 1988 <1> 1989 <1> 1990 <1> 1991 00000985 0F6F4560 <1> pmov mm0, B(8) 1992 00000989 0F6F4D68 <1> pmov mm1, B(10) 1993 0000098D 0F0F45709A <1> pfsub mm0, B(12) 1994 00000992 0F0F4D789A <1> pfsub mm1, B(14) 1995 00000997 0F0F4010B4 <1> pfmul mm0, C04 1996 0000099C 0F0F4818B4 <1> pfmul mm1, C28 1997 000009A1 0F7F4530 <1> pmov A(12), mm0 1998 000009A5 0F7F4D38 <1> pmov A(14), mm1 1999 <1> 2000 tu_was33 2001 <1> 2002 <1> 2003 <1> 2004 <1> 2005 <1> 2006 <1> 2007 000009A9 0F6F7008 <1> pmov mm6, C08 2008 <1> 2009 000009AD 0F6F4500 <1> pmov mm0, A(0) 2010 000009B1 0F6F4D08 <1> pmov mm1, A(2) 2011 000009B5 0F6FD0 <1> pmov mm2, mm0 2012 000009B8 0F0FC19A <1> pfsub mm0, mm1 2013 000009BC 0F0FD19E <1> pfadd mm2, mm1 2014 000009C0 0F0FC6B4 <1> pfmul mm0, mm6 2015 000009C4 0F7F5540 <1> pmov B(0), mm2 2016 000009C8 0F7F4548 <1> pmov B(2), mm0 2017 <1> 2018 <1> 2019 <1> 2020 <1> 2021 <1> 2022 <1> 2023 000009CC 0F6F4510 <1> pmov mm0, A(4) 2024 000009D0 0F6F4D18 <1> pmov mm1, A(6) 2025 000009D4 0F6FD0 <1> pmov mm2, mm0 2026 000009D7 0F0FC19A <1> pfsub mm0, mm1 2027 000009DB 0F0FD19E <1> pfadd mm2, mm1 2028 000009DF 0F0FC6B4 <1> pfmul mm0, mm6 2029 000009E3 0F7F5550 <1> pmov B(4), mm2 2030 000009E7 0F7F4558 <1> pmov B(6), mm0 2031 <1> 2032 <1> 2033 <1> 2034 <1> 2035 <1> 2036 <1> 2037 000009EB 0F6F4520 <1> pmov mm0, A(8) 2038 000009EF 0F6F4D28 <1> pmov mm1, A(10) 2039 000009F3 0F6FD0 <1> pmov mm2, mm0 2040 000009F6 0F0FC19A <1> pfsub mm0, mm1 2041 000009FA 0F0FD19E <1> pfadd mm2, mm1 2042 000009FE 0F0FC6B4 <1> pfmul mm0, mm6 2043 00000A02 0F7F5560 <1> pmov B(8), mm2 2044 00000A06 0F7F4568 <1> pmov B(10), mm0 2045 <1> 2046 <1> 2047 <1> 2048 <1> 2049 <1> 2050 <1> 2051 00000A0A 0F6F4530 <1> pmov mm0, A(12) 2052 00000A0E 0F6F4D38 <1> pmov mm1, A(14) 2053 00000A12 0F6FD0 <1> pmov mm2, mm0 2054 00000A15 0F0FC19A <1> pfsub mm0, mm1 2055 00000A19 0F0FD19E <1> pfadd mm2, mm1 2056 00000A1D 0F0FC6B4 <1> pfmul mm0, mm6 2057 00000A21 0F7F5570 <1> pmov B(12), mm2 2058 00000A25 0F7F4578 <1> pmov B(14), mm0 2059 <1> 2060 tu_was34 2061 <1> 2062 <1> 2063 <1> 2064 <1> 2065 <1> 2066 <1> 2067 00000A29 0F6F30 <1> pmov mm6, C16 2068 <1> 2069 00000A2C 0F6F4540 <1> pmov mm0, B(0) 2070 00000A30 0F6F4D48 <1> pmov mm1, B(2) 2071 00000A34 0F6FD0 <1> pmov mm2, mm0 2072 00000A37 0F6FD9 <1> pmov mm3, mm1 2073 00000A3A 0F0FC6B4 <1> pfmul mm0, mm6 2074 00000A3E 0F0FCEB4 <1> pfmul mm1, mm6 2075 00000A42 0F0FD0AE <1> pfacc mm2, mm0 2076 00000A46 0F0FD9AE <1> pfacc mm3, mm1 2077 00000A4A 0F7F5500 <1> pmov A(0), mm2 2078 00000A4E 0F7F5D08 <1> pmov A(2), mm3 2079 <1> 2080 <1> 2081 <1> 2082 <1> 2083 <1> 2084 <1> 2085 00000A52 0F6F4550 <1> pmov mm0, B(4) 2086 00000A56 0F6F4D58 <1> pmov mm1, B(6) 2087 00000A5A 0F6FD0 <1> pmov mm2, mm0 2088 00000A5D 0F6FD9 <1> pmov mm3, mm1 2089 00000A60 0F0FC6B4 <1> pfmul mm0, mm6 2090 00000A64 0F0FCEB4 <1> pfmul mm1, mm6 2091 00000A68 0F0FD0AE <1> pfacc mm2, mm0 2092 00000A6C 0F0FD9AE <1> pfacc mm3, mm1 2093 00000A70 0F7F5510 <1> pmov A(4), mm2 2094 00000A74 0F7F5D18 <1> pmov A(6), mm3 2095 <1> 2096 <1> 2097 <1> 2098 <1> 2099 <1> 2100 <1> 2101 00000A78 0F6F4560 <1> pmov mm0, B(8) 2102 00000A7C 0F6F4D68 <1> pmov mm1, B(10) 2103 00000A80 0F6FD0 <1> pmov mm2, mm0 2104 00000A83 0F6FD9 <1> pmov mm3, mm1 2105 00000A86 0F0FC6B4 <1> pfmul mm0, mm6 2106 00000A8A 0F0FCEB4 <1> pfmul mm1, mm6 2107 00000A8E 0F0FD0AE <1> pfacc mm2, mm0 2108 00000A92 0F0FD9AE <1> pfacc mm3, mm1 2109 00000A96 0F7F5520 <1> pmov A(8), mm2 2110 00000A9A 0F7F5D28 <1> pmov A(10), mm3 2111 <1> 2112 <1> 2113 <1> 2114 <1> 2115 <1> 2116 <1> 2117 00000A9E 0F6F4570 <1> pmov mm0, B(12) 2118 00000AA2 0F6F4D78 <1> pmov mm1, B(14) 2119 00000AA6 0F6FD0 <1> pmov mm2, mm0 2120 00000AA9 0F6FD9 <1> pmov mm3, mm1 2121 00000AAC 0F0FC6B4 <1> pfmul mm0, mm6 2122 00000AB0 0F0FCEB4 <1> pfmul mm1, mm6 2123 00000AB4 0F0FD0AE <1> pfacc mm2, mm0 2124 00000AB8 0F0FD9AE <1> pfacc mm3, mm1 2125 00000ABC 0F7F5530 <1> pmov A(12), mm2 2126 00000AC0 0F7F5D38 <1> pmov A(14), mm3 2127 <1> 2128 2129 00000AC4 0F6F3D[00090000] pmov mm7, qword [negativ] 2130 2131 ; V[ 5] = (V[11] = (V[13] = A07 + (V[15] = A15)) + A11) + A05 + A13; 2132 2133 00000ACB 0F6E453C movd mm0, _A(15) 2134 00000ACF 0F7E42BC movd _V(15), mm0 2135 00000AD3 0F0F451C9E pfadd mm0, A(7) 2136 00000AD8 0F7E42B4 movd _V(13), mm0 2137 00000ADC 0F0F452C9E pfadd mm0, A(11) 2138 00000AE1 0F7E42AC movd _V(11), mm0 2139 00000AE5 0F0F45149E pfadd mm0, A(5) 2140 00000AEA 0F0F45349E pfadd mm0, A(13) 2141 00000AEF 0F7E4294 movd _V(5), mm0 2142 2143 ; V[ 7] = (V[ 9] = A03 + A11 + A15) + A13; 2144 2145 00000AF3 0F6E4D0C movd mm1, _A(3) 2146 00000AF7 0F0F4D2C9E pfadd mm1, A(11) 2147 00000AFC 0F0F4D3C9E pfadd mm1, A(15) 2148 00000B01 0F7E4AA4 movd _V(9), mm1 2149 00000B05 0F0F4D349E pfadd mm1, A(13) 2150 00000B0A 0F7E4A9C movd _V(7), mm1 2151 2152 ; V[33] = -(V[ 1] = A01 + A09 + A13 + A15) - A14; 2153 2154 00000B0E 0F6E6524 movd mm4, _A(9) 2155 00000B12 0F0F65349E pfadd mm4, A(13) 2156 00000B17 0F0F653C9E pfadd mm4, A(15) 2157 00000B1C 0F6E5504 movd mm2, _A(1) 2158 00000B20 0F0FD49E pfadd mm2, mm4 2159 00000B24 0F7E5284 movd _V(1), mm2 2160 00000B28 0F0F55389E pfadd mm2, A(14) 2161 00000B2D 0FEFD7 pxor mm2, mm7 2162 00000B30 0F7E5204 movd _V(33), mm2 2163 2164 ; V[35] = -(V[ 3] = A05 + A07 + A09 + A13 + A15) - A06 - A14; 2165 2166 00000B34 0F0F65149E pfadd mm4, A(5) 2167 00000B39 0F0F651C9E pfadd mm4, A(7) 2168 00000B3E 0F7E628C movd _V(3), mm4 2169 00000B42 0FEFE7 pxor mm4, mm7 2170 00000B45 0F0F65189A pfsub mm4, A(6) 2171 00000B4A 0F0F65389A pfsub mm4, A(14) 2172 00000B4F 0F7E620C movd _V(35), mm4 2173 2174 ; V[37] = (tmp = -(A10 + A11 + A13 + A14 + A15)) - A05 - A06 - A07; 2175 2176 00000B53 0F6F4D28 pmov mm1, A(10) 2177 00000B57 0F6F5538 pmov mm2, A(14) 2178 00000B5B 0F0FCAAE pfacc mm1, mm2 2179 00000B5F 0F0FC9AE pfacc mm1, mm1 2180 00000B63 0F0F4D349E pfadd mm1, A(13) 2181 00000B68 0FEFCF pxor mm1, mm7 2182 00000B6B 0F6F6518 pmov mm4, A(6) 2183 00000B6F 0F6FF1 pmov mm6, mm1 2184 00000B72 0F0FE4AE pfacc mm4, mm4 2185 00000B76 0F0F4D149A pfsub mm1, A(5) 2186 00000B7B 0F0FCC9A pfsub mm1, mm4 2187 00000B7F 0F7E4A14 movd _V(37), mm1 2188 2189 ; V[39] = tmp - A02 - A03; // abhängig vom Befehl drüber 2190 2191 00000B83 0F6F5D08 pmov mm3, A(2) 2192 00000B87 0F6FD6 pmov mm2, mm6 2193 00000B8A 0F0FDBAE pfacc mm3, mm3 2194 00000B8E 0F0FD39A pfsub mm2, mm3 2195 00000B92 0F7E521C movd _V(39), mm2 2196 2197 ; V[41] = (tmp += A13 - A12) - A02 - A03; // abhängig vom Befehl 2 drüber 2198 2199 00000B96 0F0F75349E pfadd mm6, A(13) 2200 00000B9B 0F0F75309A pfsub mm6, A(12) 2201 00000BA0 0F0FDEAA pfsubr mm3, mm6 2202 00000BA4 0F7E5A24 movd _V(41), mm3 2203 2204 ; V[43] = tmp - A04 - A06 - A07; // abhängig von Befehlen 1 und 3 drüber 2205 2206 00000BA8 0F6E6D10 movd mm5, _A(4) 2207 00000BAC 0F0FEC9E pfadd mm5, mm4 2208 00000BB0 0F0FF59A pfsub mm6, mm5 2209 00000BB4 0F7E722C movd _V(43), mm6 2210 2211 ; V[47] = (tmp = -(A08 + A12 + A14 + A15)) - A00; 2212 2213 00000BB8 0F6E4D20 movd mm1, _A(8) 2214 00000BBC 0F0F4D309E pfadd mm1, A(12) 2215 00000BC1 0F0F4D389E pfadd mm1, A(14) 2216 00000BC6 0F0F4D3C9E pfadd mm1, A(15) 2217 00000BCB 0FEFCF pxor mm1, mm7 2218 00000BCE 0F6FF1 pmov mm6, mm1 2219 00000BD1 0F0F4D009A pfsub mm1, A(0) 2220 00000BD6 0F7E4A3C movd _V(47), mm1 2221 2222 ; V[45] = tmp - A04 - A06 - A07; // abhängig vom Befehl drüber 2223 2224 00000BDA 0F0FF59A pfsub mm6, mm5 2225 00000BDE 0F7E7234 movd _V(45), mm6 2226 2227 00000BE2 89DC mov esp, ebx 2228 2229 ; V[32] = -V[ 0]; 2230 ; V[31] = -V[ 1]; 2231 ; V[30] = -V[ 2]; 2232 ; V[29] = -V[ 3]; 2233 ; V[28] = -V[ 4]; 2234 ; V[27] = -V[ 5]; 2235 ; V[26] = -V[ 6]; 2236 ; V[25] = -V[ 7]; 2237 ; V[24] = -V[ 8]; 2238 ; V[23] = -V[ 9]; 2239 ; V[22] = -V[10]; 2240 ; V[21] = -V[11]; 2241 ; V[20] = -V[12]; 2242 ; V[19] = -V[13]; 2243 ; V[18] = -V[14]; 2244 ; V[17] = -V[15]; 2245 2246 00000BE4 B900000080 mov ecx, 80000000h 2247 invcopy2 32, 0 2248 00000BE9 8B4280 <1> mov eax, _V(%2) 2249 00000BEC 8B5A84 <1> mov ebx, _V(%2+1) 2250 00000BEF 01C8 <1> add eax, ecx 2251 00000BF1 01CB <1> add ebx, ecx 2252 <1> 2253 <1> 2254 00000BF3 8902 <1> mov _V(%1), eax 2255 00000BF5 895AFC <1> mov _V(%1-1), ebx 2256 invcopy2 30, 2 2257 00000BF8 8B4288 <1> mov eax, _V(%2) 2258 00000BFB 8B5A8C <1> mov ebx, _V(%2+1) 2259 00000BFE 01C8 <1> add eax, ecx 2260 00000C00 01CB <1> add ebx, ecx 2261 <1> 2262 <1> 2263 00000C02 8942F8 <1> mov _V(%1), eax 2264 00000C05 895AF4 <1> mov _V(%1-1), ebx 2265 invcopy2 28, 4 2266 00000C08 8B4290 <1> mov eax, _V(%2) 2267 00000C0B 8B5A94 <1> mov ebx, _V(%2+1) 2268 00000C0E 01C8 <1> add eax, ecx 2269 00000C10 01CB <1> add ebx, ecx 2270 <1> 2271 <1> 2272 00000C12 8942F0 <1> mov _V(%1), eax 2273 00000C15 895AEC <1> mov _V(%1-1), ebx 2274 invcopy2 26, 6 2275 00000C18 8B4298 <1> mov eax, _V(%2) 2276 00000C1B 8B5A9C <1> mov ebx, _V(%2+1) 2277 00000C1E 01C8 <1> add eax, ecx 2278 00000C20 01CB <1> add ebx, ecx 2279 <1> 2280 <1> 2281 00000C22 8942E8 <1> mov _V(%1), eax 2282 00000C25 895AE4 <1> mov _V(%1-1), ebx 2283 invcopy2 24, 8 2284 00000C28 8B42A0 <1> mov eax, _V(%2) 2285 00000C2B 8B5AA4 <1> mov ebx, _V(%2+1) 2286 00000C2E 01C8 <1> add eax, ecx 2287 00000C30 01CB <1> add ebx, ecx 2288 <1> 2289 <1> 2290 00000C32 8942E0 <1> mov _V(%1), eax 2291 00000C35 895ADC <1> mov _V(%1-1), ebx 2292 invcopy2 22, 10 2293 00000C38 8B42A8 <1> mov eax, _V(%2) 2294 00000C3B 8B5AAC <1> mov ebx, _V(%2+1) 2295 00000C3E 01C8 <1> add eax, ecx 2296 00000C40 01CB <1> add ebx, ecx 2297 <1> 2298 <1> 2299 00000C42 8942D8 <1> mov _V(%1), eax 2300 00000C45 895AD4 <1> mov _V(%1-1), ebx 2301 invcopy2 20, 12 2302 00000C48 8B42B0 <1> mov eax, _V(%2) 2303 00000C4B 8B5AB4 <1> mov ebx, _V(%2+1) 2304 00000C4E 01C8 <1> add eax, ecx 2305 00000C50 01CB <1> add ebx, ecx 2306 <1> 2307 <1> 2308 00000C52 8942D0 <1> mov _V(%1), eax 2309 00000C55 895ACC <1> mov _V(%1-1), ebx 2310 invcopy2 18, 14 2311 00000C58 8B42B8 <1> mov eax, _V(%2) 2312 00000C5B 8B5ABC <1> mov ebx, _V(%2+1) 2313 00000C5E 01C8 <1> add eax, ecx 2314 00000C60 01CB <1> add ebx, ecx 2315 <1> 2316 <1> 2317 00000C62 8942C8 <1> mov _V(%1), eax 2318 00000C65 895AC4 <1> mov _V(%1-1), ebx 2319 2320 ; V[63] = V[33]; 2321 ; V[62] = V[34]; 2322 ; V[61] = V[35]; 2323 ; V[60] = V[36]; 2324 ; V[59] = V[37]; 2325 ; V[58] = V[38]; 2326 ; V[57] = V[39]; 2327 ; V[56] = V[40]; 2328 ; V[55] = V[41]; 2329 ; V[54] = V[42]; 2330 ; V[53] = V[43]; 2331 ; V[52] = V[44]; 2332 ; V[51] = V[45]; 2333 ; V[50] = V[46]; 2334 ; V[49] = V[47]; 2335 2336 00000C68 81C284000000 add edx, 33*4 2337 copy2 30, 0 2338 00000C6E 8B4280 <1> mov eax, _V(%2) 2339 00000C71 8B5A84 <1> mov ebx, _V(%2+1) 2340 00000C74 8942F8 <1> mov _V(%1), eax 2341 00000C77 895AF4 <1> mov _V(%1-1), ebx 2342 copy2 28, 2 2343 00000C7A 8B4288 <1> mov eax, _V(%2) 2344 00000C7D 8B5A8C <1> mov ebx, _V(%2+1) 2345 00000C80 8942F0 <1> mov _V(%1), eax 2346 00000C83 895AEC <1> mov _V(%1-1), ebx 2347 copy2 26, 4 2348 00000C86 8B4290 <1> mov eax, _V(%2) 2349 00000C89 8B5A94 <1> mov ebx, _V(%2+1) 2350 00000C8C 8942E8 <1> mov _V(%1), eax 2351 00000C8F 895AE4 <1> mov _V(%1-1), ebx 2352 copy2 24, 6 2353 00000C92 8B4298 <1> mov eax, _V(%2) 2354 00000C95 8B5A9C <1> mov ebx, _V(%2+1) 2355 00000C98 8942E0 <1> mov _V(%1), eax 2356 00000C9B 895ADC <1> mov _V(%1-1), ebx 2357 copy2 22, 8 2358 00000C9E 8B42A0 <1> mov eax, _V(%2) 2359 00000CA1 8B5AA4 <1> mov ebx, _V(%2+1) 2360 00000CA4 8942D8 <1> mov _V(%1), eax 2361 00000CA7 895AD4 <1> mov _V(%1-1), ebx 2362 copy2 20, 10 2363 00000CAA 8B42A8 <1> mov eax, _V(%2) 2364 00000CAD 8B5AAC <1> mov ebx, _V(%2+1) 2365 00000CB0 8942D0 <1> mov _V(%1), eax 2366 00000CB3 895ACC <1> mov _V(%1-1), ebx 2367 copy2 18, 12 2368 00000CB6 8B42B0 <1> mov eax, _V(%2) 2369 00000CB9 8B5AB4 <1> mov ebx, _V(%2+1) 2370 00000CBC 8942C8 <1> mov _V(%1), eax 2371 00000CBF 895AC4 <1> mov _V(%1-1), ebx 2372 copy1 16, 14 2373 00000CC2 8B42B8 <1> mov eax, _V(%2) 2374 00000CC5 8942C0 <1> mov _V(%1), eax 2375 2376 00000CC8 5D pop ebp 2377 00000CC9 5B pop ebx 2378 endproc 2379 <1> %ifnctx proc 2380 <1> %error expected 'proc' before 'endproc'. 2381 <1> %else 2382 <1> %if %$STACK > 0 2383 <1> add esp, %$STACK 2384 <1> %endif 2385 <1> 2386 <1> %if %$STACK <> (-%$STACKN) 2387 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 2388 <1> %endif 2389 <1> 2390 00000CCA C3 <1> ret 2391 <1> %pop 2392 <1> %endif 2393 ;**************************************************************************** 2394 2395 %macro tu_wasS 0 2396 2397 ; B00 = A00 + A08; 2398 ; B01 = A01 + A09; 2399 ; B02 = A02 + A10; 2400 ; B03 = A03 + A11; 2401 ; B04 = A04 + A12; 2402 ; B05 = A05 + A13; 2403 ; B06 = A06 + A14; 2404 ; B07 = A07 + A15; 2405 2406 movaps xmm2, xmm0 2407 movaps xmm3, xmm1 2408 addps xmm0, xmm4 2409 addps xmm1, xmm5 2410 2411 ; B08 = (A00 - A08) * C[ 2]; 2412 ; B09 = (A01 - A09) * C[ 6]; 2413 ; B10 = (A02 - A10) * C[14]; 2414 ; B11 = (A03 - A11) * C[10]; 2415 ; B12 = (A04 - A12) * C[30]; 2416 ; B13 = (A05 - A13) * C[26]; 2417 ; B14 = (A06 - A14) * C[18]; 2418 ; B15 = (A07 - A15) * C[22]; 2419 2420 subps xmm2, xmm4 2421 subps xmm3, xmm5 2422 mulps xmm2, CC02 2423 mulps xmm3, CC30 2424 2425 ; A00 = B00 + B04; 2426 ; A01 = B01 + B05; 2427 ; A02 = B02 + B06; 2428 ; A03 = B03 + B07; 2429 ; A04 = (B00 - B04) * C[ 4]; 2430 ; A05 = (B01 - B05) * C[12]; 2431 ; A06 = (B02 - B06) * C[28]; 2432 ; A07 = (B03 - B07) * C[20]; 2433 2434 movaps xmm5, xmm0 2435 movaps xmm4, xmm0 2436 subps xmm5, xmm1 2437 addps xmm4, xmm1 2438 mulps xmm5, CC04 2439 2440 ; A08 = B08 + B12; 2441 ; A09 = B09 + B13; 2442 ; A10 = B10 + B14; 2443 ; A11 = B11 + B15; 2444 ; A12 = (B08 - B12) * C[ 4]; 2445 ; A13 = (B09 - B13) * C[12]; 2446 ; A14 = (B10 - B14) * C[28]; 2447 ; A15 = (B11 - B15) * C[20]; 2448 2449 movaps xmm7, xmm2 2450 movaps xmm6, xmm2 2451 subps xmm7, xmm3 2452 addps xmm6, xmm3 2453 mulps xmm7, CC04 2454 2455 ; B00 = A00 + A02; B00 = A00 * 1 + A02 * 1 2456 ; B01 = A01 + A03; B01 = A01 * 1 + A03 * 1 2457 ; B02 = (A00 - A02) * C[ 8]; B02 = A02 * -C8 + A00 * C8 2458 ; B03 = (A01 - A03) * C[24]; B03 = A03 * -C24 + A01 * C24 2459 ; B04 = A04 + A06; 2460 ; B05 = A05 + A07; 2461 ; B06 = (A04 - A06) * C[ 8]; 2462 ; B07 = (A05 - A07) * C[24]; 2463 ; B08 = A08 + A10; 2464 ; B09 = A09 + A11; 2465 ; B10 = (A08 - A10) * C[ 8]; 2466 ; B11 = (A09 - A11) * C[24]; 2467 ; B12 = A12 + A14; 2468 ; B13 = A13 + A15; 2469 ; B14 = (A12 - A14) * C[ 8]; 2470 ; B15 = (A13 - A15) * C[24]; 2471 2472 movaps xmm0, xmm4 2473 shufps xmm4, xmm4, 0x4E ; 4#1032# = 0x4E 2474 mulps xmm0, CM110824 2475 mulps xmm4, CP110824 2476 addps xmm0, xmm4 2477 2478 movaps xmm1, xmm5 2479 shufps xmm5, xmm5, 0x4E ; 4#1032# = 0x4E 2480 mulps xmm1, CM110824 2481 mulps xmm5, CP110824 2482 addps xmm1, xmm5 2483 2484 movaps xmm2, xmm6 2485 shufps xmm6, xmm6, 0x4E ; 4#1032# = 0x4E 2486 mulps xmm2, CM110824 2487 mulps xmm6, CP110824 2488 addps xmm2, xmm6 2489 2490 movaps xmm3, xmm7 2491 shufps xmm7, xmm7, 0x4E ; 4#1032# = 0x4E 2492 mulps xmm3, CM110824 2493 mulps xmm7, CP110824 2494 addps xmm3, xmm7 2495 2496 ; A00 = B00 + B01; A00 = B00 * 1 + B01 * 1 2497 ; A01 = (B00 - B01) * C[16]; A01 = B01 * -C16 + B00 * C16 2498 ; A02 = B02 + B03; A02 = B02 * 1 + B03 * 1 2499 ; A03 = (B02 - B03) * C[16]; A03 = B03 * -C16 + B02 * C16 2500 ; A04 = B04 + B05; 2501 ; A05 = (B04 - B05) * C[16]; 2502 ; A06 = B06 + B07; 2503 ; A07 = (B06 - B07) * C[16]; 2504 ; A08 = B08 + B09; 2505 ; A09 = (B08 - B09) * C[16]; 2506 ; A10 = B10 + B11; 2507 ; A11 = (B10 - B11) * C[16]; 2508 ; A12 = B12 + B13; 2509 ; A13 = (B12 - B13) * C[16]; 2510 ; A14 = B14 + B15; 2511 ; A15 = (B14 - B15) * C[16]; 2512 2513 movaps xmm4, xmm0 2514 shufps xmm0, xmm0, 0xB1 ; 4#2301# = 0xB1 2515 mulps xmm4, CM116116 2516 mulps xmm0, CP116116 2517 addps xmm4, xmm0 2518 2519 movaps xmm5, xmm1 2520 shufps xmm1, xmm1, 0xB1 ; 4#2301# = 0xB1 2521 mulps xmm5, CM116116 2522 mulps xmm1, CP116116 2523 addps xmm5, xmm1 2524 2525 movaps xmm6, xmm2 2526 shufps xmm2, xmm2, 0xB1 ; 4#2301# = 0xB1 2527 mulps xmm6, CM116116 2528 mulps xmm2, CP116116 2529 addps xmm6, xmm2 2530 2531 movaps xmm7, xmm3 2532 shufps xmm3, xmm3, 0xB1 ; 4#2301# = 0xB1 2533 mulps xmm7, CM116116 2534 mulps xmm3, CP116116 2535 addps xmm7, xmm3 2536 2537 ; Store 2538 2539 movaps [edx+4* 0], xmm4 2540 movaps [edx+4* 4], xmm5 2541 movaps [edx+4* 8], xmm6 2542 movaps [edx+4*12], xmm7 2543 %endmacro 2544 2545 00000CCB 90 align 32 2546 proc New_V_Helper2 2547 <1> %push proc 2548 <1> global _%1 2549 <1> global %1 2550 <1> _%1: 2551 <1> %1: 2552 <1> %assign %$STACK 0 2553 <1> %assign %$STACKN 0 2554 <1> %assign %$ARG 4 2555 $A6 arg 4 2556 <1> $A6 equ %$ARG 2557 <1> %assign %$ARG %$ARG+%1 2558 $Sample6 arg 4 2559 <1> $Sample6 equ %$ARG 2560 <1> %assign %$ARG %$ARG+%1 2561 00000CE0 8B4C2408 mov ecx, [sp($Sample6)] 2562 00000CE4 8B542404 mov edx, [sp($A6)] 2563 00000CE8 B8[60080000] mov eax, C 2564 2565 ; A[ 0] = Sample[ 0] + Sample[31]; 2566 ; A[ 1] = Sample[ 1] + Sample[30]; 2567 ; A[ 2] = Sample[ 3] + Sample[28]; 2568 ; A[ 3] = Sample[ 2] + Sample[29]; 2569 ; A[ 4] = Sample[ 7] + Sample[24]; 2570 ; A[ 5] = Sample[ 6] + Sample[25]; 2571 ; A[ 6] = Sample[ 4] + Sample[27]; 2572 ; A[ 7] = Sample[ 5] + Sample[26]; 2573 ; A[ 8] = Sample[15] + Sample[16]; 2574 ; A[ 9] = Sample[14] + Sample[17]; 2575 ; A[10] = Sample[12] + Sample[19]; 2576 ; A[11] = Sample[13] + Sample[18]; 2577 ; A[12] = Sample[ 8] + Sample[23]; 2578 ; A[13] = Sample[ 9] + Sample[22]; 2579 ; A[14] = Sample[11] + Sample[20]; 2580 ; A[15] = Sample[10] + Sample[21]; 2581 2582 00000CED 0F2801 movaps xmm0, [ecx+ 0] 2583 00000CF0 0FC6C0B4 shufps xmm0, xmm0, 0xB4 ; 4#2310# = 0xB4 2584 00000CF4 0F284910 movaps xmm1, [ecx+ 16] 2585 00000CF8 0FC6C94B shufps xmm1, xmm1, 0x4B ; 4#1023# = 0x4B 2586 00000CFC 0F285120 movaps xmm2, [ecx+ 32] 2587 00000D00 0FC6D2B4 shufps xmm2, xmm2, 0xB4 ; 4#2310# = 0xB4 2588 00000D04 0F285930 movaps xmm3, [ecx+ 48] 2589 00000D08 0FC6DB4B shufps xmm3, xmm3, 0x4B ; 4#1023# = 0x4B 2590 00000D0C 0F286140 movaps xmm4, [ecx+ 64] 2591 00000D10 0FC6E4B4 shufps xmm4, xmm4, 0xB4 ; 4#2310# = 0xB4 2592 00000D14 0F58E3 addps xmm4, xmm3 2593 00000D17 0F286950 movaps xmm5, [ecx+ 80] 2594 00000D1B 0FC6ED4B shufps xmm5, xmm5, 0x4B ; 4#1023# = 0x4B 2595 00000D1F 0F58EA addps xmm5, xmm2 2596 00000D22 0F287160 movaps xmm6, [ecx+ 96] 2597 00000D26 0FC6F6B4 shufps xmm6, xmm6, 0xB4 ; 4#2310# = 0xB4 2598 00000D2A 0F58CE addps xmm1, xmm6 2599 00000D2D 0F287970 movaps xmm7, [ecx+112] 2600 00000D31 0FC6FF4B shufps xmm7, xmm7, 0x4B ; 4#1023# = 0x4B 2601 00000D35 0F58C7 addps xmm0, xmm7 2602 2603 tu_wasS 2604 <1> 2605 <1> 2606 <1> 2607 <1> 2608 <1> 2609 <1> 2610 <1> 2611 <1> 2612 <1> 2613 <1> 2614 00000D38 0F28D0 <1> movaps xmm2, xmm0 2615 00000D3B 0F28D9 <1> movaps xmm3, xmm1 2616 00000D3E 0F58C4 <1> addps xmm0, xmm4 2617 00000D41 0F58CD <1> addps xmm1, xmm5 2618 <1> 2619 <1> 2620 <1> 2621 <1> 2622 <1> 2623 <1> 2624 <1> 2625 <1> 2626 <1> 2627 <1> 2628 00000D44 0F5CD4 <1> subps xmm2, xmm4 2629 00000D47 0F5CDD <1> subps xmm3, xmm5 2630 00000D4A 0F595020 <1> mulps xmm2, CC02 2631 00000D4E 0F595830 <1> mulps xmm3, CC30 2632 <1> 2633 <1> 2634 <1> 2635 <1> 2636 <1> 2637 <1> 2638 <1> 2639 <1> 2640 <1> 2641 <1> 2642 00000D52 0F28E8 <1> movaps xmm5, xmm0 2643 00000D55 0F28E0 <1> movaps xmm4, xmm0 2644 00000D58 0F5CE9 <1> subps xmm5, xmm1 2645 00000D5B 0F58E1 <1> addps xmm4, xmm1 2646 00000D5E 0F596810 <1> mulps xmm5, CC04 2647 <1> 2648 <1> 2649 <1> 2650 <1> 2651 <1> 2652 <1> 2653 <1> 2654 <1> 2655 <1> 2656 <1> 2657 00000D62 0F28FA <1> movaps xmm7, xmm2 2658 00000D65 0F28F2 <1> movaps xmm6, xmm2 2659 00000D68 0F5CFB <1> subps xmm7, xmm3 2660 00000D6B 0F58F3 <1> addps xmm6, xmm3 2661 00000D6E 0F597810 <1> mulps xmm7, CC04 2662 <1> 2663 <1> 2664 <1> 2665 <1> 2666 <1> 2667 <1> 2668 <1> 2669 <1> 2670 <1> 2671 <1> 2672 <1> 2673 <1> 2674 <1> 2675 <1> 2676 <1> 2677 <1> 2678 <1> 2679 <1> 2680 00000D72 0F28C4 <1> movaps xmm0, xmm4 2681 00000D75 0FC6E44E <1> shufps xmm4, xmm4, 0x4E 2682 00000D79 0F5940C0 <1> mulps xmm0, CM110824 2683 00000D7D 0F5960D0 <1> mulps xmm4, CP110824 2684 00000D81 0F58C4 <1> addps xmm0, xmm4 2685 <1> 2686 00000D84 0F28CD <1> movaps xmm1, xmm5 2687 00000D87 0FC6ED4E <1> shufps xmm5, xmm5, 0x4E 2688 00000D8B 0F5948C0 <1> mulps xmm1, CM110824 2689 00000D8F 0F5968D0 <1> mulps xmm5, CP110824 2690 00000D93 0F58CD <1> addps xmm1, xmm5 2691 <1> 2692 00000D96 0F28D6 <1> movaps xmm2, xmm6 2693 00000D99 0FC6F64E <1> shufps xmm6, xmm6, 0x4E 2694 00000D9D 0F5950C0 <1> mulps xmm2, CM110824 2695 00000DA1 0F5970D0 <1> mulps xmm6, CP110824 2696 00000DA5 0F58D6 <1> addps xmm2, xmm6 2697 <1> 2698 00000DA8 0F28DF <1> movaps xmm3, xmm7 2699 00000DAB 0FC6FF4E <1> shufps xmm7, xmm7, 0x4E 2700 00000DAF 0F5958C0 <1> mulps xmm3, CM110824 2701 00000DB3 0F5978D0 <1> mulps xmm7, CP110824 2702 00000DB7 0F58DF <1> addps xmm3, xmm7 2703 <1> 2704 <1> 2705 <1> 2706 <1> 2707 <1> 2708 <1> 2709 <1> 2710 <1> 2711 <1> 2712 <1> 2713 <1> 2714 <1> 2715 <1> 2716 <1> 2717 <1> 2718 <1> 2719 <1> 2720 <1> 2721 00000DBA 0F28E0 <1> movaps xmm4, xmm0 2722 00000DBD 0FC6C0B1 <1> shufps xmm0, xmm0, 0xB1 2723 00000DC1 0F5960E0 <1> mulps xmm4, CM116116 2724 00000DC5 0F5940F0 <1> mulps xmm0, CP116116 2725 00000DC9 0F58E0 <1> addps xmm4, xmm0 2726 <1> 2727 00000DCC 0F28E9 <1> movaps xmm5, xmm1 2728 00000DCF 0FC6C9B1 <1> shufps xmm1, xmm1, 0xB1 2729 00000DD3 0F5968E0 <1> mulps xmm5, CM116116 2730 00000DD7 0F5948F0 <1> mulps xmm1, CP116116 2731 00000DDB 0F58E9 <1> addps xmm5, xmm1 2732 <1> 2733 00000DDE 0F28F2 <1> movaps xmm6, xmm2 2734 00000DE1 0FC6D2B1 <1> shufps xmm2, xmm2, 0xB1 2735 00000DE5 0F5970E0 <1> mulps xmm6, CM116116 2736 00000DE9 0F5950F0 <1> mulps xmm2, CP116116 2737 00000DED 0F58F2 <1> addps xmm6, xmm2 2738 <1> 2739 00000DF0 0F28FB <1> movaps xmm7, xmm3 2740 00000DF3 0FC6DBB1 <1> shufps xmm3, xmm3, 0xB1 2741 00000DF7 0F5978E0 <1> mulps xmm7, CM116116 2742 00000DFB 0F5958F0 <1> mulps xmm3, CP116116 2743 00000DFF 0F58FB <1> addps xmm7, xmm3 2744 <1> 2745 <1> 2746 <1> 2747 00000E02 0F2922 <1> movaps [edx+4* 0], xmm4 2748 00000E05 0F296A10 <1> movaps [edx+4* 4], xmm5 2749 00000E09 0F297220 <1> movaps [edx+4* 8], xmm6 2750 00000E0D 0F297A30 <1> movaps [edx+4*12], xmm7 2751 endproc 2752 <1> %ifnctx proc 2753 <1> %error expected 'proc' before 'endproc'. 2754 <1> %else 2755 <1> %if %$STACK > 0 2756 <1> add esp, %$STACK 2757 <1> %endif 2758 <1> 2759 <1> %if %$STACK <> (-%$STACKN) 2760 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 2761 <1> %endif 2762 <1> 2763 00000E11 C3 <1> ret 2764 <1> %pop 2765 <1> %endif 2766 2767 ;********************************************************************************************* 2768 2769 00000E12 90 align 32 2770 proc New_V_Helper3 2771 <1> %push proc 2772 <1> global _%1 2773 <1> global %1 2774 <1> _%1: 2775 <1> %1: 2776 <1> %assign %$STACK 0 2777 <1> %assign %$STACKN 0 2778 <1> %assign %$ARG 4 2779 $A7 arg 4 2780 <1> $A7 equ %$ARG 2781 <1> %assign %$ARG %$ARG+%1 2782 $Sample7 arg 4 2783 <1> $Sample7 equ %$ARG 2784 <1> %assign %$ARG %$ARG+%1 2785 00000E20 8B4C2408 mov ecx, [sp($Sample7)] 2786 00000E24 8B542404 mov edx, [sp($A7)] 2787 00000E28 B8[60080000] mov eax, C 2788 2789 ; A[ 0] = (Sample[ 0] - Sample[31]) * C[ 1]; Sample[ 0] + Sample[31]; 2790 ; A[ 1] = (Sample[ 1] - Sample[30]) * C[ 3]; Sample[ 1] + Sample[30]; 2791 ; A[ 2] = (Sample[ 3] - Sample[28]) * C[ 7]; Sample[ 3] + Sample[28]; 2792 ; A[ 3] = (Sample[ 2] - Sample[29]) * C[ 5]; Sample[ 2] + Sample[29]; 2793 ; A[ 4] = (Sample[ 7] - Sample[24]) * C[15]; Sample[ 7] + Sample[24]; 2794 ; A[ 5] = (Sample[ 6] - Sample[25]) * C[13]; Sample[ 6] + Sample[25]; 2795 ; A[ 6] = (Sample[ 4] - Sample[27]) * C[ 9]; Sample[ 4] + Sample[27]; 2796 ; A[ 7] = (Sample[ 5] - Sample[26]) * C[11]; Sample[ 5] + Sample[26]; 2797 ; A[ 8] = (Sample[15] - Sample[16]) * C[31]; Sample[15] + Sample[16]; 2798 ; A[ 9] = (Sample[14] - Sample[17]) * C[29]; Sample[14] + Sample[17]; 2799 ; A[10] = (Sample[12] - Sample[19]) * C[25]; Sample[12] + Sample[19]; 2800 ; A[11] = (Sample[13] - Sample[18]) * C[27]; Sample[13] + Sample[18]; 2801 ; A[12] = (Sample[ 8] - Sample[23]) * C[17]; Sample[ 8] + Sample[23]; 2802 ; A[13] = (Sample[ 9] - Sample[22]) * C[19]; Sample[ 9] + Sample[22]; 2803 ; A[14] = (Sample[11] - Sample[20]) * C[23]; Sample[11] + Sample[20]; 2804 ; A[15] = (Sample[10] - Sample[21]) * C[21]; Sample[10] + Sample[21]; 2805 2806 00000E2D 0F2801 movaps xmm0, [ecx+ 0] 2807 00000E30 0FC6C0B4 shufps xmm0, xmm0, 0xB4 ; 4#2310# = 0xB4 2808 00000E34 0F284910 movaps xmm1, [ecx+ 16] 2809 00000E38 0FC6C94B shufps xmm1, xmm1, 0x4B ; 4#1023# = 0x4B 2810 00000E3C 0F285120 movaps xmm2, [ecx+ 32] 2811 00000E40 0FC6D2B4 shufps xmm2, xmm2, 0xB4 ; 4#2310# = 0xB4 2812 00000E44 0F285930 movaps xmm3, [ecx+ 48] 2813 00000E48 0FC6DB4B shufps xmm3, xmm3, 0x4B ; 4#1023# = 0x4B 2814 00000E4C 0F286140 movaps xmm4, [ecx+ 64] 2815 00000E50 0FC6E4B4 shufps xmm4, xmm4, 0xB4 ; 4#2310# = 0xB4 2816 00000E54 0F5CE3 subps xmm4, xmm3 2817 00000E57 0F5960A0 mulps xmm4, CM31 2818 00000E5B 0F286950 movaps xmm5, [ecx+ 80] 2819 00000E5F 0FC6ED4B shufps xmm5, xmm5, 0x4B ; 4#1023# = 0x4B 2820 00000E63 0F5CEA subps xmm5, xmm2 2821 00000E66 0F5968B0 mulps xmm5, CM17 2822 00000E6A 0F287160 movaps xmm6, [ecx+ 96] 2823 00000E6E 0FC6F6B4 shufps xmm6, xmm6, 0xB4 ; 4#2310# = 0xB4 2824 00000E72 0F5CCE subps xmm1, xmm6 2825 00000E75 0F594850 mulps xmm1, CC15 2826 00000E79 0F287970 movaps xmm7, [ecx+112] 2827 00000E7D 0FC6FF4B shufps xmm7, xmm7, 0x4B ; 4#1023# = 0x4B 2828 00000E81 0F5CC7 subps xmm0, xmm7 2829 00000E84 0F594040 mulps xmm0, CC01 2830 2831 tu_wasS 2832 <1> 2833 <1> 2834 <1> 2835 <1> 2836 <1> 2837 <1> 2838 <1> 2839 <1> 2840 <1> 2841 <1> 2842 00000E88 0F28D0 <1> movaps xmm2, xmm0 2843 00000E8B 0F28D9 <1> movaps xmm3, xmm1 2844 00000E8E 0F58C4 <1> addps xmm0, xmm4 2845 00000E91 0F58CD <1> addps xmm1, xmm5 2846 <1> 2847 <1> 2848 <1> 2849 <1> 2850 <1> 2851 <1> 2852 <1> 2853 <1> 2854 <1> 2855 <1> 2856 00000E94 0F5CD4 <1> subps xmm2, xmm4 2857 00000E97 0F5CDD <1> subps xmm3, xmm5 2858 00000E9A 0F595020 <1> mulps xmm2, CC02 2859 00000E9E 0F595830 <1> mulps xmm3, CC30 2860 <1> 2861 <1> 2862 <1> 2863 <1> 2864 <1> 2865 <1> 2866 <1> 2867 <1> 2868 <1> 2869 <1> 2870 00000EA2 0F28E8 <1> movaps xmm5, xmm0 2871 00000EA5 0F28E0 <1> movaps xmm4, xmm0 2872 00000EA8 0F5CE9 <1> subps xmm5, xmm1 2873 00000EAB 0F58E1 <1> addps xmm4, xmm1 2874 00000EAE 0F596810 <1> mulps xmm5, CC04 2875 <1> 2876 <1> 2877 <1> 2878 <1> 2879 <1> 2880 <1> 2881 <1> 2882 <1> 2883 <1> 2884 <1> 2885 00000EB2 0F28FA <1> movaps xmm7, xmm2 2886 00000EB5 0F28F2 <1> movaps xmm6, xmm2 2887 00000EB8 0F5CFB <1> subps xmm7, xmm3 2888 00000EBB 0F58F3 <1> addps xmm6, xmm3 2889 00000EBE 0F597810 <1> mulps xmm7, CC04 2890 <1> 2891 <1> 2892 <1> 2893 <1> 2894 <1> 2895 <1> 2896 <1> 2897 <1> 2898 <1> 2899 <1> 2900 <1> 2901 <1> 2902 <1> 2903 <1> 2904 <1> 2905 <1> 2906 <1> 2907 <1> 2908 00000EC2 0F28C4 <1> movaps xmm0, xmm4 2909 00000EC5 0FC6E44E <1> shufps xmm4, xmm4, 0x4E 2910 00000EC9 0F5940C0 <1> mulps xmm0, CM110824 2911 00000ECD 0F5960D0 <1> mulps xmm4, CP110824 2912 00000ED1 0F58C4 <1> addps xmm0, xmm4 2913 <1> 2914 00000ED4 0F28CD <1> movaps xmm1, xmm5 2915 00000ED7 0FC6ED4E <1> shufps xmm5, xmm5, 0x4E 2916 00000EDB 0F5948C0 <1> mulps xmm1, CM110824 2917 00000EDF 0F5968D0 <1> mulps xmm5, CP110824 2918 00000EE3 0F58CD <1> addps xmm1, xmm5 2919 <1> 2920 00000EE6 0F28D6 <1> movaps xmm2, xmm6 2921 00000EE9 0FC6F64E <1> shufps xmm6, xmm6, 0x4E 2922 00000EED 0F5950C0 <1> mulps xmm2, CM110824 2923 00000EF1 0F5970D0 <1> mulps xmm6, CP110824 2924 00000EF5 0F58D6 <1> addps xmm2, xmm6 2925 <1> 2926 00000EF8 0F28DF <1> movaps xmm3, xmm7 2927 00000EFB 0FC6FF4E <1> shufps xmm7, xmm7, 0x4E 2928 00000EFF 0F5958C0 <1> mulps xmm3, CM110824 2929 00000F03 0F5978D0 <1> mulps xmm7, CP110824 2930 00000F07 0F58DF <1> addps xmm3, xmm7 2931 <1> 2932 <1> 2933 <1> 2934 <1> 2935 <1> 2936 <1> 2937 <1> 2938 <1> 2939 <1> 2940 <1> 2941 <1> 2942 <1> 2943 <1> 2944 <1> 2945 <1> 2946 <1> 2947 <1> 2948 <1> 2949 00000F0A 0F28E0 <1> movaps xmm4, xmm0 2950 00000F0D 0FC6C0B1 <1> shufps xmm0, xmm0, 0xB1 2951 00000F11 0F5960E0 <1> mulps xmm4, CM116116 2952 00000F15 0F5940F0 <1> mulps xmm0, CP116116 2953 00000F19 0F58E0 <1> addps xmm4, xmm0 2954 <1> 2955 00000F1C 0F28E9 <1> movaps xmm5, xmm1 2956 00000F1F 0FC6C9B1 <1> shufps xmm1, xmm1, 0xB1 2957 00000F23 0F5968E0 <1> mulps xmm5, CM116116 2958 00000F27 0F5948F0 <1> mulps xmm1, CP116116 2959 00000F2B 0F58E9 <1> addps xmm5, xmm1 2960 <1> 2961 00000F2E 0F28F2 <1> movaps xmm6, xmm2 2962 00000F31 0FC6D2B1 <1> shufps xmm2, xmm2, 0xB1 2963 00000F35 0F5970E0 <1> mulps xmm6, CM116116 2964 00000F39 0F5950F0 <1> mulps xmm2, CP116116 2965 00000F3D 0F58F2 <1> addps xmm6, xmm2 2966 <1> 2967 00000F40 0F28FB <1> movaps xmm7, xmm3 2968 00000F43 0FC6DBB1 <1> shufps xmm3, xmm3, 0xB1 2969 00000F47 0F5978E0 <1> mulps xmm7, CM116116 2970 00000F4B 0F5958F0 <1> mulps xmm3, CP116116 2971 00000F4F 0F58FB <1> addps xmm7, xmm3 2972 <1> 2973 <1> 2974 <1> 2975 00000F52 0F2922 <1> movaps [edx+4* 0], xmm4 2976 00000F55 0F296A10 <1> movaps [edx+4* 4], xmm5 2977 00000F59 0F297220 <1> movaps [edx+4* 8], xmm6 2978 00000F5D 0F297A30 <1> movaps [edx+4*12], xmm7 2979 endproc 2980 <1> %ifnctx proc 2981 <1> %error expected 'proc' before 'endproc'. 2982 <1> %else 2983 <1> %if %$STACK > 0 2984 <1> add esp, %$STACK 2985 <1> %endif 2986 <1> 2987 <1> %if %$STACK <> (-%$STACKN) 2988 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 2989 <1> %endif 2990 <1> 2991 00000F61 C3 <1> ret 2992 <1> %pop 2993 <1> %endif 2994 2995 ;********************************************************************************************* 2996 2997 00000F62 90 align 32 2998 proc New_V_Helper4 2999 <1> %push proc 3000 <1> global _%1 3001 <1> global %1 3002 <1> _%1: 3003 <1> %1: 3004 <1> %assign %$STACK 0 3005 <1> %assign %$STACKN 0 3006 <1> %assign %$ARG 4 3007 $V8 arg 4 3008 <1> $V8 equ %$ARG 3009 <1> %assign %$ARG %$ARG+%1 3010 00000F80 8B542404 mov edx,[sp($V8)] 3011 3012 ; V[32] = -V[ 0]; 3013 ; V[31] = -V[ 1]; 3014 ; V[30] = -V[ 2]; 3015 ; V[29] = -V[ 3]; 3016 ; V[28] = -V[ 4]; 3017 ; V[27] = -V[ 5]; 3018 ; V[26] = -V[ 6]; 3019 ; V[25] = -V[ 7]; 3020 ; V[24] = -V[ 8]; 3021 ; V[23] = -V[ 9]; 3022 ; V[22] = -V[10]; 3023 ; V[21] = -V[11]; 3024 ; V[20] = -V[12]; 3025 ; V[19] = -V[13]; 3026 ; V[18] = -V[14]; 3027 ; V[17] = -V[15]; 3028 ; V[63] = V[33]; 3029 ; V[62] = V[34]; 3030 ; V[61] = V[35]; 3031 ; V[60] = V[36]; 3032 ; V[59] = V[37]; 3033 ; V[58] = V[38]; 3034 ; V[57] = V[39]; 3035 ; V[56] = V[40]; 3036 ; V[55] = V[41]; 3037 ; V[54] = V[42]; 3038 ; V[53] = V[43]; 3039 ; V[52] = V[44]; 3040 ; V[51] = V[45]; 3041 ; V[50] = V[46]; 3042 ; V[49] = V[47]; 3043 3044 00000F84 0F283D[00090000] movaps xmm7, [negativ] 3045 00000F8B 0F2802 movaps xmm0, [edx+ 0*4] 3046 00000F8E 0F57C7 xorps xmm0, xmm7 3047 00000F91 0F284A10 movaps xmm1, [edx+ 4*4] 3048 00000F95 0F57CF xorps xmm1, xmm7 3049 00000F98 0F285220 movaps xmm2, [edx+ 8*4] 3050 00000F9C 0F57D7 xorps xmm2, xmm7 3051 00000F9F 0F285A30 movaps xmm3, [edx+12*4] 3052 00000FA3 0F57DF xorps xmm3, xmm7 3053 00000FA6 0FC6C01B shufps xmm0, xmm0, 0x1B ; 4#0123# = 0x1B 3054 00000FAA 0FC6C91B shufps xmm1, xmm1, 0x1B 3055 00000FAE 0FC6D21B shufps xmm2, xmm2, 0x1B 3056 00000FB2 0FC6DB1B shufps xmm3, xmm3, 0x1B 3057 00000FB6 0F10A2B4000000 movups xmm4, [edx+45*4] 3058 00000FBD 0FC6E41B shufps xmm4, xmm4, 0x1B ; 4#0123# = 0x1B 3059 00000FC1 0F10AAA4000000 movups xmm5, [edx+41*4] 3060 00000FC8 0FC6ED1B shufps xmm5, xmm5, 0x1B 3061 00000FCC 0F10B294000000 movups xmm6, [edx+37*4] 3062 00000FD3 0FC6F61B shufps xmm6, xmm6, 0x1B 3063 00000FD7 0F10BA84000000 movups xmm7, [edx+33*4] 3064 00000FDE 0FC6FF1B shufps xmm7, xmm7, 0x1B 3065 3066 00000FE2 0F114274 movups [edx+29*4], xmm0 3067 00000FE6 0F114A64 movups [edx+25*4], xmm1 3068 00000FEA 0F115254 movups [edx+21*4], xmm2 3069 00000FEE 0F115A44 movups [edx+17*4], xmm3 3070 00000FF2 0F29A2C0000000 movaps [edx+48*4], xmm4 3071 00000FF9 0F29AAD0000000 movaps [edx+52*4], xmm5 3072 00001000 0F29B2E0000000 movaps [edx+56*4], xmm6 3073 00001007 0F29BAF0000000 movaps [edx+60*4], xmm7 3074 3075 endproc 3076 <1> %ifnctx proc 3077 <1> %error expected 'proc' before 'endproc'. 3078 <1> %else 3079 <1> %if %$STACK > 0 3080 <1> add esp, %$STACK 3081 <1> %endif 3082 <1> 3083 <1> %if %$STACK <> (-%$STACKN) 3084 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 3085 <1> %endif 3086 <1> 3087 0000100E C3 <1> ret 3088 <1> %pop 3089 <1> %endif 3090 3091 3092 ; . . . . . . . . . . . . . . . - 3093 ; + . . . . . . . . . . . . . . . 3094 ; . . . . . . . . . . . - . . . . 3095 ; . . . . + . . . . . . - . . . . 3096 ; . . . . . . . . . . . . . - . . 3097 ; . . + . . . . . . - . . . . . . 3098 ; . . . . . . . . . - . . . - . . 3099 ; . . + . . . + . . - . . . - . . 3100 ; . . . . . . . . . . . . . . - . 3101 ; . + . . . . . . - . . . . . . . 3102 ; . . . . . . . . . . - . - . . . 3103 ; . . . + . + . . . . - . - . . . 3104 ; . . . . . . . . . . . . - . - . 3105 ; . + . + . . . . - . - . . . . . 3106 ; . . . . . . . . - . - . - . - . 3107 ; . + . + . + . + - . - . - . - . 3108 3109 3110 ; . . . . . . . . . . . . . . . - 3111 ; + . . . . . . . - . . . . . . . 3112 ; . . . . . . . . . . . - - . . . 3113 ; . . . + + . . . . . . - - . . . 3114 ; . . . . . . . . . . . . . - - . 3115 ; . + + . . . . . . - - . . . . . 3116 ; . . . . . . . . . - - . . - - . 3117 ; . + + . . + + . . - - . . - - . 3118 ; . . . . . . . . . . . . . . - - 3119 ; + + . . . . . . - - . . . . . . 3120 ; . . . . . . . . . . - - - - . . 3121 ; . . + + + + . . . . - - - - . . 3122 ; . . . . . . . . . . . . - - - - 3123 ; + + + + . . . . - - - - . . . . 3124 ; . . . . . . . . - - - - - - - - 3125 ; + + + + + + + + - - - - - - - - 3126 3127 3128 ;**************************************************************************** 3129 3130 0000100F 90 align 4 3131 proc Reset_FPU_3DNow 3132 <1> %push proc 3133 <1> global _%1 3134 <1> global %1 3135 <1> _%1: 3136 <1> %1: 3137 <1> %assign %$STACK 0 3138 <1> %assign %$STACKN 0 3139 <1> %assign %$ARG 4 3140 00001010 0F0E femms 3141 endproc 3142 <1> %ifnctx proc 3143 <1> %error expected 'proc' before 'endproc'. 3144 <1> %else 3145 <1> %if %$STACK > 0 3146 <1> add esp, %$STACK 3147 <1> %endif 3148 <1> 3149 <1> %if %$STACK <> (-%$STACKN) 3150 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 3151 <1> %endif 3152 <1> 3153 00001012 C3 <1> ret 3154 <1> %pop 3155 <1> %endif 3156 3157 ;***************************************************************************** 3158 3159 00001013 90 align 4 3160 proc Reset_FPU 3161 <1> %push proc 3162 <1> global _%1 3163 <1> global %1 3164 <1> _%1: 3165 <1> %1: 3166 <1> %assign %$STACK 0 3167 <1> %assign %$STACKN 0 3168 <1> %assign %$ARG 4 3169 00001014 0F77 emms 3170 endproc 3171 <1> %ifnctx proc 3172 <1> %error expected 'proc' before 'endproc'. 3173 <1> %else 3174 <1> %if %$STACK > 0 3175 <1> add esp, %$STACK 3176 <1> %endif 3177 <1> 3178 <1> %if %$STACK <> (-%$STACKN) 3179 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 3180 <1> %endif 3181 <1> 3182 00001016 C3 <1> ret 3183 <1> %pop 3184 <1> %endif 3185 3186 ;****************************************************************************** 3187 3188 00001017 90 align 32 3189 00001020 90 times 5 nop 3190 proc memcpy_dn_MMX 3191 <1> %push proc 3192 <1> global _%1 3193 <1> global %1 3194 <1> _%1: 3195 <1> %1: 3196 <1> %assign %$STACK 0 3197 <1> %assign %$STACKN 0 3198 <1> %assign %$ARG 4 3199 $dst1 arg 4 3200 <1> $dst1 equ %$ARG 3201 <1> %assign %$ARG %$ARG+%1 3202 $src1 arg 4 3203 <1> $src1 equ %$ARG 3204 <1> %assign %$ARG %$ARG+%1 3205 $words1 arg 4 3206 <1> $words1 equ %$ARG 3207 <1> %assign %$ARG %$ARG+%1 3208 00001025 8B442404 mov eax, [sp($dst1)] 3209 00001029 8B542408 mov edx, [sp($src1)] 3210 0000102D 8B4C240C mov ecx, [sp($words1)] 3211 00001031 C1E106 shl ecx, 6 3212 00001034 8D540AC0 lea edx, [edx+ecx-64] 3213 00001038 8D4408C0 lea eax, [eax+ecx-64] 3214 0000103C 8B4C240C mov ecx, [sp($words1)] 3215 lbl3: 3216 00001040 0F6F02 pmov mm0, qword [edx+ 0] 3217 00001043 0F6F4A08 pmov mm1, qword [edx+ 8] 3218 00001047 0F6F5210 pmov mm2, qword [edx+16] 3219 0000104B 0F6F5A18 pmov mm3, qword [edx+24] 3220 0000104F 0F6F6220 pmov mm4, qword [edx+32] 3221 00001053 0F6F6A28 pmov mm5, qword [edx+40] 3222 00001057 0F6F7230 pmov mm6, qword [edx+48] 3223 0000105B 0F6F7A38 pmov mm7, qword [edx+56] 3224 0000105F 83C2C0 add edx, byte -64 3225 3226 00001062 0F7F00 pmov qword [eax+ 0], mm0 3227 00001065 0F7F4808 pmov qword [eax+ 8], mm1 3228 00001069 0F7F5010 pmov qword [eax+16], mm2 3229 0000106D 0F7F5818 pmov qword [eax+24], mm3 3230 00001071 0F7F6020 pmov qword [eax+32], mm4 3231 00001075 0F7F6828 pmov qword [eax+40], mm5 3232 00001079 0F7F7030 pmov qword [eax+48], mm6 3233 0000107D 0F7F7838 pmov qword [eax+56], mm7 3234 00001081 83C0C0 add eax, byte -64 3235 3236 00001084 49 dec ecx 3237 00001085 75B9 jnz short lbl3 3238 endproc 3239 <1> %ifnctx proc 3240 <1> %error expected 'proc' before 'endproc'. 3241 <1> %else 3242 <1> %if %$STACK > 0 3243 <1> add esp, %$STACK 3244 <1> %endif 3245 <1> 3246 <1> %if %$STACK <> (-%$STACKN) 3247 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 3248 <1> %endif 3249 <1> 3250 00001087 C3 <1> ret 3251 <1> %pop 3252 <1> %endif 3253 3254 ;******************************************************************************** 3255 3256 00001088 90 align 32 3257 000010A0 90 times 5 nop 3258 proc memcpy_dn_SIMD 3259 <1> %push proc 3260 <1> global _%1 3261 <1> global %1 3262 <1> _%1: 3263 <1> %1: 3264 <1> %assign %$STACK 0 3265 <1> %assign %$STACKN 0 3266 <1> %assign %$ARG 4 3267 $dst2 arg 4 3268 <1> $dst2 equ %$ARG 3269 <1> %assign %$ARG %$ARG+%1 3270 $src2 arg 4 3271 <1> $src2 equ %$ARG 3272 <1> %assign %$ARG %$ARG+%1 3273 $words2 arg 4 3274 <1> $words2 equ %$ARG 3275 <1> %assign %$ARG %$ARG+%1 3276 000010A5 8B442404 mov eax, [sp($dst2)] 3277 000010A9 8B542408 mov edx, [sp($src2)] 3278 000010AD 8B4C240C mov ecx, [sp($words2)] 3279 000010B1 C1E107 shl ecx, 7 3280 000010B4 8D540A80 lea edx, [edx+ecx-128] 3281 000010B8 8D440880 lea eax, [eax+ecx-128] 3282 000010BC 8B4C240C mov ecx, [sp($words2)] 3283 lbl4: 3284 000010C0 0F2802 movaps xmm0, [edx+ 0] 3285 000010C3 0F284A10 movaps xmm1, [edx+ 16] 3286 000010C7 0F285220 movaps xmm2, [edx+ 32] 3287 000010CB 0F285A30 movaps xmm3, [edx+ 48] 3288 000010CF 0F286240 movaps xmm4, [edx+ 64] 3289 000010D3 0F286A50 movaps xmm5, [edx+ 80] 3290 000010D7 0F287260 movaps xmm6, [edx+ 96] 3291 000010DB 0F287A70 movaps xmm7, [edx+112] 3292 000010DF 83C280 add edx, byte -128 3293 3294 000010E2 0F2900 movaps [eax+ 0], xmm0 3295 000010E5 0F294810 movaps [eax+ 16], xmm1 3296 000010E9 0F295020 movaps [eax+ 32], xmm2 3297 000010ED 0F295830 movaps [eax+ 48], xmm3 3298 000010F1 0F296040 movaps [eax+ 64], xmm4 3299 000010F5 0F296850 movaps [eax+ 80], xmm5 3300 000010F9 0F297060 movaps [eax+ 96], xmm6 3301 000010FD 0F297870 movaps [eax+112], xmm7 3302 00001101 83C080 add eax, byte -128 3303 3304 00001104 49 dec ecx 3305 00001105 75B9 jnz short lbl4 3306 endproc 3307 <1> %ifnctx proc 3308 <1> %error expected 'proc' before 'endproc'. 3309 <1> %else 3310 <1> %if %$STACK > 0 3311 <1> add esp, %$STACK 3312 <1> %endif 3313 <1> 3314 <1> %if %$STACK <> (-%$STACKN) 3315 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 3316 <1> %endif 3317 <1> 3318 00001107 C3 <1> ret 3319 <1> %pop 3320 <1> %endif 3321 3322 3323 ;################################################################################################################## 3324 3325 3326 00001108 90 align 32 3327 proc Calculate_New_V_i387 3328 <1> %push proc 3329 <1> global _%1 3330 <1> global %1 3331 <1> _%1: 3332 <1> %1: 3333 <1> %assign %$STACK 0 3334 <1> %assign %$STACKN 0 3335 <1> %assign %$ARG 4 3336 $S9 arg 4 3337 <1> $S9 equ %$ARG 3338 <1> %assign %$ARG %$ARG+%1 3339 $V9 arg 4 3340 <1> $V9 equ %$ARG 3341 <1> %assign %$ARG %$ARG+%1 3342 00001120 8B4C2404 mov ecx, [sp($S9)] 3343 00001124 8B542408 mov edx, [sp($V9)] 3344 00001128 83EA80 sub edx, byte -128 3345 0000112B 55 push ebp 3346 0000112C B8[60080000] mov eax, C 3347 00001131 83C480 add esp, byte -128 3348 00001134 89E5 mov ebp, esp 3349 3350 %macro op1 2 3351 fld _S(%1) ; S00 3352 fadd _S(31-%1) ; A00 3353 fld _S(15-%1) ; S15 A00 3354 fadd _S(16+%1) ; A08 A00 3355 fld st1 ; A00 A08 A00 3356 fsub st0, st1 ; A00-A08 A08 A00 3357 fmul %2 ; B08 A08 A00 3358 fxch st2 ; A00 A08 B08 3359 faddp st1 ; B00 B08 3360 %endmacro 3361 3362 %macro opx 2 ; B04 B12 B00 B08 3363 fld st2 ; B00 B04 B12 B00 B08 3364 fsub st0, st1 ; B00-B04 B04 B12 B00 B08 3365 fmul %2 ; A04 B04 B12 B00 B08 3366 fstp _A(%1+4) ; B04 B12 B00 B08 3367 fld st3 ; B08 B04 B12 B00 B08 3368 fsub st0, st2 ; B08-B12 B04 B12 B00 B08 3369 fmul %2 ; A12 B04 B12 B00 B08 3370 fstp _A(%1+12) ; B04 B12 B00 B08 3371 faddp st2 ; B12 A00 B08 3372 faddp st2 ; A00 A08 3373 fstp _A(%1) ; A08 3374 fstp _A(%1+8) ; 3375 %endmacro 3376 3377 ; A00 = Sample[ 0] + Sample[31]; 3378 ; A08 = Sample[15] + Sample[16]; 3379 ; B00 = A00 + A08; 3380 ; B08 = (A00 - A08) * C[ 2]; 3381 ; A04 = Sample[ 7] + Sample[24]; 3382 ; A12 = Sample[ 8] + Sample[23]; 3383 ; B04 = A04 + A12; 3384 ; B12 = (A04 - A12) * C[30]; 3385 3386 op1 0, _C02 3387 00001136 D901 <1> fld _S(%1) 3388 00001138 D8417C <1> fadd _S(31-%1) 3389 0000113B D9413C <1> fld _S(15-%1) 3390 0000113E D84140 <1> fadd _S(16+%1) 3391 00001141 D9C1 <1> fld st1 3392 00001143 D8E1 <1> fsub st0, st1 3393 00001145 D84820 <1> fmul %2 3394 00001148 D9CA <1> fxch st2 3395 0000114A DEC1 <1> faddp st1 3396 op1 7, _C30 3397 0000114C D9411C <1> fld _S(%1) 3398 0000114F D84160 <1> fadd _S(31-%1) 3399 00001152 D94120 <1> fld _S(15-%1) 3400 00001155 D8415C <1> fadd _S(16+%1) 3401 00001158 D9C1 <1> fld st1 3402 0000115A D8E1 <1> fsub st0, st1 3403 0000115C D84830 <1> fmul %2 3404 0000115F D9CA <1> fxch st2 3405 00001161 DEC1 <1> faddp st1 3406 3407 ; A00 = B00 + B04; 3408 ; A04 = (B00 - B04) * C[ 4]; 3409 ; A08 = B08 + B12; 3410 ; A12 = (B08 - B12) * C[ 4]; 3411 3412 opx 0, _C04 3413 00001163 D9C2 <1> fld st2 3414 00001165 D8E1 <1> fsub st0, st1 3415 00001167 D84810 <1> fmul %2 3416 0000116A D95D10 <1> fstp _A(%1+4) 3417 0000116D D9C3 <1> fld st3 3418 0000116F D8E2 <1> fsub st0, st2 3419 00001171 D84810 <1> fmul %2 3420 00001174 D95D30 <1> fstp _A(%1+12) 3421 00001177 DEC2 <1> faddp st2 3422 00001179 DEC2 <1> faddp st2 3423 0000117B D95D00 <1> fstp _A(%1) 3424 0000117E D95D20 <1> fstp _A(%1+8) 3425 3426 ; A01 = Sample[ 1] + Sample[30]; 3427 ; A09 = Sample[14] + Sample[17]; 3428 ; B01 = A01 + A09; 3429 ; B09 = (A01 - A09) * C[ 6]; 3430 ; A05 = Sample[ 6] + Sample[25]; 3431 ; A13 = Sample[ 9] + Sample[22]; 3432 ; B05 = A05 + A13; 3433 ; B13 = (A05 - A13) * C[26]; 3434 3435 op1 1, _C06 3436 00001181 D94104 <1> fld _S(%1) 3437 00001184 D84178 <1> fadd _S(31-%1) 3438 00001187 D94138 <1> fld _S(15-%1) 3439 0000118A D84144 <1> fadd _S(16+%1) 3440 0000118D D9C1 <1> fld st1 3441 0000118F D8E1 <1> fsub st0, st1 3442 00001191 D84824 <1> fmul %2 3443 00001194 D9CA <1> fxch st2 3444 00001196 DEC1 <1> faddp st1 3445 op1 6, _C26 3446 00001198 D94118 <1> fld _S(%1) 3447 0000119B D84164 <1> fadd _S(31-%1) 3448 0000119E D94124 <1> fld _S(15-%1) 3449 000011A1 D84158 <1> fadd _S(16+%1) 3450 000011A4 D9C1 <1> fld st1 3451 000011A6 D8E1 <1> fsub st0, st1 3452 000011A8 D84834 <1> fmul %2 3453 000011AB D9CA <1> fxch st2 3454 000011AD DEC1 <1> faddp st1 3455 3456 ; A01 = B01 + B05; 3457 ; A05 = (B01 - B05) * C[12]; 3458 ; A09 = B09 + B13; 3459 ; A13 = (B09 - B13) * C[12]; 3460 3461 opx 1, _C12 3462 000011AF D9C2 <1> fld st2 3463 000011B1 D8E1 <1> fsub st0, st1 3464 000011B3 D84814 <1> fmul %2 3465 000011B6 D95D14 <1> fstp _A(%1+4) 3466 000011B9 D9C3 <1> fld st3 3467 000011BB D8E2 <1> fsub st0, st2 3468 000011BD D84814 <1> fmul %2 3469 000011C0 D95D34 <1> fstp _A(%1+12) 3470 000011C3 DEC2 <1> faddp st2 3471 000011C5 DEC2 <1> faddp st2 3472 000011C7 D95D04 <1> fstp _A(%1) 3473 000011CA D95D24 <1> fstp _A(%1+8) 3474 3475 ; A02 = Sample[ 3] + Sample[28]; 3476 ; A10 = Sample[12] + Sample[19]; 3477 ; B02 = A02 + A10; 3478 ; B10 = (A02 - A10) * C[14]; 3479 ; A06 = Sample[ 4] + Sample[27]; 3480 ; A14 = Sample[11] + Sample[20]; 3481 ; B06 = A06 + A14; 3482 ; B14 = (A06 - A14) * C[18]; 3483 3484 op1 3, _C14 3485 000011CD D9410C <1> fld _S(%1) 3486 000011D0 D84170 <1> fadd _S(31-%1) 3487 000011D3 D94130 <1> fld _S(15-%1) 3488 000011D6 D8414C <1> fadd _S(16+%1) 3489 000011D9 D9C1 <1> fld st1 3490 000011DB D8E1 <1> fsub st0, st1 3491 000011DD D84828 <1> fmul %2 3492 000011E0 D9CA <1> fxch st2 3493 000011E2 DEC1 <1> faddp st1 3494 op1 4, _C18 3495 000011E4 D94110 <1> fld _S(%1) 3496 000011E7 D8416C <1> fadd _S(31-%1) 3497 000011EA D9412C <1> fld _S(15-%1) 3498 000011ED D84150 <1> fadd _S(16+%1) 3499 000011F0 D9C1 <1> fld st1 3500 000011F2 D8E1 <1> fsub st0, st1 3501 000011F4 D84838 <1> fmul %2 3502 000011F7 D9CA <1> fxch st2 3503 000011F9 DEC1 <1> faddp st1 3504 3505 ; A02 = B02 + B06; 3506 ; A06 = (B02 - B06) * C[28]; 3507 ; A10 = B10 + B14; 3508 ; A14 = (B10 - B14) * C[28]; 3509 3510 opx 2, _C28 3511 000011FB D9C2 <1> fld st2 3512 000011FD D8E1 <1> fsub st0, st1 3513 000011FF D84818 <1> fmul %2 3514 00001202 D95D18 <1> fstp _A(%1+4) 3515 00001205 D9C3 <1> fld st3 3516 00001207 D8E2 <1> fsub st0, st2 3517 00001209 D84818 <1> fmul %2 3518 0000120C D95D38 <1> fstp _A(%1+12) 3519 0000120F DEC2 <1> faddp st2 3520 00001211 DEC2 <1> faddp st2 3521 00001213 D95D08 <1> fstp _A(%1) 3522 00001216 D95D28 <1> fstp _A(%1+8) 3523 3524 ; A03 = Sample[ 2] + Sample[29]; 3525 ; A11 = Sample[13] + Sample[18]; 3526 ; B03 = A03 + A11; 3527 ; B11 = (A03 - A11) * C[10]; 3528 ; A07 = Sample[ 5] + Sample[26]; 3529 ; A15 = Sample[10] + Sample[21]; 3530 ; B07 = A07 + A15; 3531 ; B15 = (A07 - A15) * C[22]; 3532 3533 op1 2, _C10 3534 00001219 D94108 <1> fld _S(%1) 3535 0000121C D84174 <1> fadd _S(31-%1) 3536 0000121F D94134 <1> fld _S(15-%1) 3537 00001222 D84148 <1> fadd _S(16+%1) 3538 00001225 D9C1 <1> fld st1 3539 00001227 D8E1 <1> fsub st0, st1 3540 00001229 D8482C <1> fmul %2 3541 0000122C D9CA <1> fxch st2 3542 0000122E DEC1 <1> faddp st1 3543 op1 5, _C22 3544 00001230 D94114 <1> fld _S(%1) 3545 00001233 D84168 <1> fadd _S(31-%1) 3546 00001236 D94128 <1> fld _S(15-%1) 3547 00001239 D84154 <1> fadd _S(16+%1) 3548 0000123C D9C1 <1> fld st1 3549 0000123E D8E1 <1> fsub st0, st1 3550 00001240 D8483C <1> fmul %2 3551 00001243 D9CA <1> fxch st2 3552 00001245 DEC1 <1> faddp st1 3553 3554 ; A03 = B03 + B07; 3555 ; A07 = (B03 - B07) * C[20]; 3556 ; A11 = B11 + B15; 3557 ; A15 = (B11 - B15) * C[20]; 3558 3559 opx 3, _C20 3560 00001247 D9C2 <1> fld st2 3561 00001249 D8E1 <1> fsub st0, st1 3562 0000124B D8481C <1> fmul %2 3563 0000124E D95D1C <1> fstp _A(%1+4) 3564 00001251 D9C3 <1> fld st3 3565 00001253 D8E2 <1> fsub st0, st2 3566 00001255 D8481C <1> fmul %2 3567 00001258 D95D3C <1> fstp _A(%1+12) 3568 0000125B DEC2 <1> faddp st2 3569 0000125D DEC2 <1> faddp st2 3570 0000125F D95D0C <1> fstp _A(%1) 3571 00001262 D95D2C <1> fstp _A(%1+8) 3572 3573 3574 %macro op2 1 3575 fld _A(%1) ; A00 3576 fadd _A(%1+2) ; B00 3577 fld _A(%1+1) ; A01 B00 3578 fadd _A(%1+3) ; B01 B00 3579 fld st1 ; B00 B01 B00 3580 fsub st0, st1 ; B00-B01 B01 B00 3581 fmul _C16 ; A01 B01 B00 3582 fstp _B(%1+1) ; B01 B00 3583 faddp st1 ; A00 3584 fstp _B(%1) ; 3585 fld _A(%1) ; A00 3586 fsub _A(%1+2) ; A00-A02 3587 fmul _C08 ; B02 3588 fld _A(%1+1) ; A01 B02 3589 fsub _A(%1+3) ; A01-A03 B02 3590 fmul _C24 ; B03 B02 3591 fld st1 ; B02 B03 B02 3592 fsub st0, st1 ; B02-B03 B03 B02 3593 fmul _C16 ; A03 B03 B02 3594 fstp _B(%1+3) ; B03 B02 3595 faddp st1 ; A02 3596 fstp _B(%1+2) ; 3597 %endmacro 3598 3599 ; B00 = A00 + A02; 3600 ; B01 = A01 + A03; 3601 ; A00 = B00 + B01; 3602 ; A01 = (B00 - B01) * C[16]; 3603 ; B02 = (A00 - A02) * C[ 8]; 3604 ; B03 = (A01 - A03) * C[24]; 3605 ; A02 = B02 + B03; 3606 ; A03 = (B02 - B03) * C[16]; 3607 3608 op2 0 3609 00001265 D94500 <1> fld _A(%1) 3610 00001268 D84508 <1> fadd _A(%1+2) 3611 0000126B D94504 <1> fld _A(%1+1) 3612 0000126E D8450C <1> fadd _A(%1+3) 3613 00001271 D9C1 <1> fld st1 3614 00001273 D8E1 <1> fsub st0, st1 3615 00001275 D808 <1> fmul _C16 3616 00001277 D95D44 <1> fstp _B(%1+1) 3617 0000127A DEC1 <1> faddp st1 3618 0000127C D95D40 <1> fstp _B(%1) 3619 0000127F D94500 <1> fld _A(%1) 3620 00001282 D86508 <1> fsub _A(%1+2) 3621 00001285 D84808 <1> fmul _C08 3622 00001288 D94504 <1> fld _A(%1+1) 3623 0000128B D8650C <1> fsub _A(%1+3) 3624 0000128E D8480C <1> fmul _C24 3625 00001291 D9C1 <1> fld st1 3626 00001293 D8E1 <1> fsub st0, st1 3627 00001295 D808 <1> fmul _C16 3628 00001297 D95D4C <1> fstp _B(%1+3) 3629 0000129A DEC1 <1> faddp st1 3630 0000129C D95D48 <1> fstp _B(%1+2) 3631 3632 ; B04 = A04 + A06; 3633 ; B05 = A05 + A07; 3634 ; A04 = B04 + B05; 3635 ; A05 = (B04 - B05) * C[16]; 3636 ; B06 = (A04 - A06) * C[ 8]; 3637 ; B07 = (A05 - A07) * C[24]; 3638 ; A06 = B06 + B07; 3639 ; A07 = (B06 - B07) * C[16]; 3640 3641 op2 4 3642 0000129F D94510 <1> fld _A(%1) 3643 000012A2 D84518 <1> fadd _A(%1+2) 3644 000012A5 D94514 <1> fld _A(%1+1) 3645 000012A8 D8451C <1> fadd _A(%1+3) 3646 000012AB D9C1 <1> fld st1 3647 000012AD D8E1 <1> fsub st0, st1 3648 000012AF D808 <1> fmul _C16 3649 000012B1 D95D54 <1> fstp _B(%1+1) 3650 000012B4 DEC1 <1> faddp st1 3651 000012B6 D95D50 <1> fstp _B(%1) 3652 000012B9 D94510 <1> fld _A(%1) 3653 000012BC D86518 <1> fsub _A(%1+2) 3654 000012BF D84808 <1> fmul _C08 3655 000012C2 D94514 <1> fld _A(%1+1) 3656 000012C5 D8651C <1> fsub _A(%1+3) 3657 000012C8 D8480C <1> fmul _C24 3658 000012CB D9C1 <1> fld st1 3659 000012CD D8E1 <1> fsub st0, st1 3660 000012CF D808 <1> fmul _C16 3661 000012D1 D95D5C <1> fstp _B(%1+3) 3662 000012D4 DEC1 <1> faddp st1 3663 000012D6 D95D58 <1> fstp _B(%1+2) 3664 3665 ; B08 = A08 + A10; 3666 ; B09 = A09 + A11; 3667 ; A08 = B08 + B09; 3668 ; A09 = (B08 - B09) * C[16]; 3669 ; B10 = (A08 - A10) * C[ 8]; 3670 ; B11 = (A09 - A11) * C[24]; 3671 ; A10 = B10 + B11; 3672 ; A11 = (B10 - B11) * C[16]; 3673 3674 op2 8 3675 000012D9 D94520 <1> fld _A(%1) 3676 000012DC D84528 <1> fadd _A(%1+2) 3677 000012DF D94524 <1> fld _A(%1+1) 3678 000012E2 D8452C <1> fadd _A(%1+3) 3679 000012E5 D9C1 <1> fld st1 3680 000012E7 D8E1 <1> fsub st0, st1 3681 000012E9 D808 <1> fmul _C16 3682 000012EB D95D64 <1> fstp _B(%1+1) 3683 000012EE DEC1 <1> faddp st1 3684 000012F0 D95D60 <1> fstp _B(%1) 3685 000012F3 D94520 <1> fld _A(%1) 3686 000012F6 D86528 <1> fsub _A(%1+2) 3687 000012F9 D84808 <1> fmul _C08 3688 000012FC D94524 <1> fld _A(%1+1) 3689 000012FF D8652C <1> fsub _A(%1+3) 3690 00001302 D8480C <1> fmul _C24 3691 00001305 D9C1 <1> fld st1 3692 00001307 D8E1 <1> fsub st0, st1 3693 00001309 D808 <1> fmul _C16 3694 0000130B D95D6C <1> fstp _B(%1+3) 3695 0000130E DEC1 <1> faddp st1 3696 00001310 D95D68 <1> fstp _B(%1+2) 3697 3698 ; B12 = A12 + A14; 3699 ; B13 = A13 + A15; 3700 ; A12 = B12 + B13; 3701 ; A13 = (B12 - B13) * C[16]; 3702 ; B14 = (A12 - A14) * C[ 8]; 3703 ; B15 = (A13 - A15) * C[24]; 3704 ; A14 = B14 + B15; 3705 ; A15 = (B14 - B15) * C[16]; 3706 3707 op2 12 3708 00001313 D94530 <1> fld _A(%1) 3709 00001316 D84538 <1> fadd _A(%1+2) 3710 00001319 D94534 <1> fld _A(%1+1) 3711 0000131C D8453C <1> fadd _A(%1+3) 3712 0000131F D9C1 <1> fld st1 3713 00001321 D8E1 <1> fsub st0, st1 3714 00001323 D808 <1> fmul _C16 3715 00001325 D95D74 <1> fstp _B(%1+1) 3716 00001328 DEC1 <1> faddp st1 3717 0000132A D95D70 <1> fstp _B(%1) 3718 0000132D D94530 <1> fld _A(%1) 3719 00001330 D86538 <1> fsub _A(%1+2) 3720 00001333 D84808 <1> fmul _C08 3721 00001336 D94534 <1> fld _A(%1+1) 3722 00001339 D8653C <1> fsub _A(%1+3) 3723 0000133C D8480C <1> fmul _C24 3724 0000133F D9C1 <1> fld st1 3725 00001341 D8E1 <1> fsub st0, st1 3726 00001343 D808 <1> fmul _C16 3727 00001345 D95D7C <1> fstp _B(%1+3) 3728 00001348 DEC1 <1> faddp st1 3729 0000134A D95D78 <1> fstp _B(%1+2) 3730 3731 ; V[48] = -A00; 3732 ; V[ 0] = A01; 3733 ; V[40] = -A02 - (V[ 8] = A03); 3734 3735 0000134D D94540 fld _B(0) 3736 00001350 D9E0 fchs 3737 00001352 D95A40 fstp _V(48) 3738 00001355 D94544 fld _B(1) 3739 00001358 D95A80 fstp _V(0) 3740 0000135B D9454C fld _B(3) 3741 0000135E D952A0 fst _V(8) 3742 00001361 D84548 fadd _B(2) 3743 00001364 D9E0 fchs 3744 00001366 D95A20 fstp _V(40) 3745 3746 ; V[36] = -((V[ 4] = A05 + (V[12] = A07)) + A06); 3747 ; V[44] = - A04 - A06 - A07; 3748 3749 00001369 D9455C fld _B(7) 3750 0000136C D952B0 fst _V(12) 3751 0000136F D84554 fadd _B(5) 3752 00001372 D95290 fst _V(4) 3753 00001375 D84558 fadd _B(6) 3754 00001378 D9E0 fchs 3755 0000137A D95A10 fstp _V(36) 3756 0000137D D94550 fld _B(4) 3757 00001380 D84558 fadd _B(6) 3758 00001383 D8455C fadd _B(7) 3759 00001386 D9E0 fchs 3760 00001388 D95A30 fstp _V(44) 3761 3762 ; V[ 6] = (V[10] = A11 + (V[14] = A15)) + A13; 3763 ; V[38] = (V[34] = -(V[ 2] = A09 + A13 + A15) - A14) + A09 - A10 - A11; 3764 3765 0000138B D9457C fld _B(15) 3766 0000138E D952B8 fst _V(14) 3767 00001391 D8456C fadd _B(11) 3768 00001394 D952A8 fst _V(10) 3769 00001397 D84574 fadd _B(13) 3770 0000139A D95A98 fstp _V(6) 3771 0000139D D94564 fld _B(9) 3772 000013A0 D84574 fadd _B(13) 3773 000013A3 D8457C fadd _B(15) 3774 000013A6 D95288 fst _V(2) 3775 000013A9 D84578 fadd _B(14) 3776 000013AC D9E0 fchs 3777 000013AE D95208 fst _V(34) 3778 000013B1 D84564 fadd _B(9) 3779 000013B4 D86568 fsub _B(10) 3780 000013B7 D8656C fsub _B(11) 3781 000013BA D95A18 fstp _V(38) 3782 3783 ; V[46] = (tmp = -(A12 + A14 + A15)) - A08; 3784 3785 000013BD D94570 fld _B(12) 3786 000013C0 D84578 fadd _B(14) 3787 000013C3 D8457C fadd _B(15) 3788 000013C6 D9E0 fchs 3789 000013C8 D9C0 fld st0 3790 000013CA D86560 fsub _B(8) 3791 000013CD D95A38 fstp _V(46) 3792 3793 ; V[42] = tmp - A10 - A11; // abhängig vom Befehl drüber 3794 3795 000013D0 D86568 fsub _B(10) 3796 000013D3 D8656C fsub _B(11) 3797 000013D6 D95A28 fstp _V(42) 3798 3799 3800 %macro op4 4 3801 fld _S(%1) ; S00 3802 fsub _S(31-%1) ; A00 3803 fmul %2 3804 fld _S(15-%1) ; S15 A00 3805 fsub _S(16+%1) ; A08 A00 3806 fmul %3 3807 fld st1 ; A00 A08 A00 3808 fsub st0, st1 ; A00-A08 A08 A00 3809 fmul %4 ; B08 A08 A00 3810 fxch st2 ; A00 A08 B08 3811 faddp st1 ; B00 B08 3812 %endmacro 3813 3814 ; A00 = (Sample[ 0] - Sample[31]) * C[ 1]; 3815 ; A08 = (Sample[15] - Sample[16]) * C[31]; 3816 ; B00 = A00 + A08; 3817 ; B08 = (A00 - A08) * C[ 2]; 3818 ; A04 = (Sample[ 7] - Sample[24]) * C[15]; 3819 ; A12 = (Sample[ 8] - Sample[23]) * C[17]; 3820 ; B04 = A04 + A12; 3821 ; B12 = (A04 - A12) * C[30]; 3822 3823 op4 0, _C01, _C31, _C02 3824 000013D9 D901 <1> fld _S(%1) 3825 000013DB D8617C <1> fsub _S(31-%1) 3826 000013DE D84840 <1> fmul %2 3827 000013E1 D9413C <1> fld _S(15-%1) 3828 000013E4 D86140 <1> fsub _S(16+%1) 3829 000013E7 D84860 <1> fmul %3 3830 000013EA D9C1 <1> fld st1 3831 000013EC D8E1 <1> fsub st0, st1 3832 000013EE D84820 <1> fmul %4 3833 000013F1 D9CA <1> fxch st2 3834 000013F3 DEC1 <1> faddp st1 3835 op4 7, _C15, _C17, _C30 3836 000013F5 D9411C <1> fld _S(%1) 3837 000013F8 D86160 <1> fsub _S(31-%1) 3838 000013FB D84850 <1> fmul %2 3839 000013FE D94120 <1> fld _S(15-%1) 3840 00001401 D8615C <1> fsub _S(16+%1) 3841 00001404 D84870 <1> fmul %3 3842 00001407 D9C1 <1> fld st1 3843 00001409 D8E1 <1> fsub st0, st1 3844 0000140B D84830 <1> fmul %4 3845 0000140E D9CA <1> fxch st2 3846 00001410 DEC1 <1> faddp st1 3847 3848 ; A00 = B00 + B04; 3849 ; A04 = (B00 - B04) * C[ 4]; 3850 ; A08 = B08 + B12; 3851 ; A12 = (B08 - B12) * C[ 4]; 3852 3853 opx 0, _C04 3854 00001412 D9C2 <1> fld st2 3855 00001414 D8E1 <1> fsub st0, st1 3856 00001416 D84810 <1> fmul %2 3857 00001419 D95D10 <1> fstp _A(%1+4) 3858 0000141C D9C3 <1> fld st3 3859 0000141E D8E2 <1> fsub st0, st2 3860 00001420 D84810 <1> fmul %2 3861 00001423 D95D30 <1> fstp _A(%1+12) 3862 00001426 DEC2 <1> faddp st2 3863 00001428 DEC2 <1> faddp st2 3864 0000142A D95D00 <1> fstp _A(%1) 3865 0000142D D95D20 <1> fstp _A(%1+8) 3866 3867 ; A01 = (Sample[ 1] - Sample[30]) * C[ 3]; 3868 ; A09 = (Sample[14] - Sample[17]) * C[29]; 3869 ; B01 = A01 + A09; 3870 ; B09 = (A01 - A09) * C[ 6]; 3871 ; A05 = (Sample[ 6] - Sample[25]) * C[13]; 3872 ; A13 = (Sample[ 9] - Sample[22]) * C[19]; 3873 ; B05 = A05 + A13; 3874 ; B13 = (A05 - A13) * C[26]; 3875 3876 op4 1, _C03, _C29, _C06 3877 00001430 D94104 <1> fld _S(%1) 3878 00001433 D86178 <1> fsub _S(31-%1) 3879 00001436 D84844 <1> fmul %2 3880 00001439 D94138 <1> fld _S(15-%1) 3881 0000143C D86144 <1> fsub _S(16+%1) 3882 0000143F D84864 <1> fmul %3 3883 00001442 D9C1 <1> fld st1 3884 00001444 D8E1 <1> fsub st0, st1 3885 00001446 D84824 <1> fmul %4 3886 00001449 D9CA <1> fxch st2 3887 0000144B DEC1 <1> faddp st1 3888 op4 6, _C13, _C19, _C26 3889 0000144D D94118 <1> fld _S(%1) 3890 00001450 D86164 <1> fsub _S(31-%1) 3891 00001453 D84854 <1> fmul %2 3892 00001456 D94124 <1> fld _S(15-%1) 3893 00001459 D86158 <1> fsub _S(16+%1) 3894 0000145C D84874 <1> fmul %3 3895 0000145F D9C1 <1> fld st1 3896 00001461 D8E1 <1> fsub st0, st1 3897 00001463 D84834 <1> fmul %4 3898 00001466 D9CA <1> fxch st2 3899 00001468 DEC1 <1> faddp st1 3900 3901 ; A01 = B01 + B05; 3902 ; A05 = (B01 - B05) * C[12]; 3903 ; A09 = B09 + B13; 3904 ; A13 = (B09 - B13) * C[12]; 3905 3906 opx 1, _C12 3907 0000146A D9C2 <1> fld st2 3908 0000146C D8E1 <1> fsub st0, st1 3909 0000146E D84814 <1> fmul %2 3910 00001471 D95D14 <1> fstp _A(%1+4) 3911 00001474 D9C3 <1> fld st3 3912 00001476 D8E2 <1> fsub st0, st2 3913 00001478 D84814 <1> fmul %2 3914 0000147B D95D34 <1> fstp _A(%1+12) 3915 0000147E DEC2 <1> faddp st2 3916 00001480 DEC2 <1> faddp st2 3917 00001482 D95D04 <1> fstp _A(%1) 3918 00001485 D95D24 <1> fstp _A(%1+8) 3919 3920 ; A02 = (Sample[ 3] - Sample[28]) * C[ 7]; 3921 ; A10 = (Sample[12] - Sample[19]) * C[25]; 3922 ; B02 = A02 + A10; 3923 ; B10 = (A02 - A10) * C[14]; 3924 ; A06 = (Sample[ 4] - Sample[27]) * C[ 9]; 3925 ; A14 = (Sample[11] - Sample[20]) * C[23]; 3926 ; B06 = A06 + A14; 3927 ; B14 = (A06 - A14) * C[18]; 3928 3929 op4 3, _C07, _C25, _C14 3930 00001488 D9410C <1> fld _S(%1) 3931 0000148B D86170 <1> fsub _S(31-%1) 3932 0000148E D84848 <1> fmul %2 3933 00001491 D94130 <1> fld _S(15-%1) 3934 00001494 D8614C <1> fsub _S(16+%1) 3935 00001497 D84868 <1> fmul %3 3936 0000149A D9C1 <1> fld st1 3937 0000149C D8E1 <1> fsub st0, st1 3938 0000149E D84828 <1> fmul %4 3939 000014A1 D9CA <1> fxch st2 3940 000014A3 DEC1 <1> faddp st1 3941 op4 4, _C09, _C23, _C18 3942 000014A5 D94110 <1> fld _S(%1) 3943 000014A8 D8616C <1> fsub _S(31-%1) 3944 000014AB D84858 <1> fmul %2 3945 000014AE D9412C <1> fld _S(15-%1) 3946 000014B1 D86150 <1> fsub _S(16+%1) 3947 000014B4 D84878 <1> fmul %3 3948 000014B7 D9C1 <1> fld st1 3949 000014B9 D8E1 <1> fsub st0, st1 3950 000014BB D84838 <1> fmul %4 3951 000014BE D9CA <1> fxch st2 3952 000014C0 DEC1 <1> faddp st1 3953 3954 ; A02 = B02 + B06; 3955 ; A06 = (B02 - B06) * C[28]; 3956 ; A10 = B10 + B14; 3957 ; A14 = (B10 - B14) * C[28]; 3958 3959 opx 2, _C28 3960 000014C2 D9C2 <1> fld st2 3961 000014C4 D8E1 <1> fsub st0, st1 3962 000014C6 D84818 <1> fmul %2 3963 000014C9 D95D18 <1> fstp _A(%1+4) 3964 000014CC D9C3 <1> fld st3 3965 000014CE D8E2 <1> fsub st0, st2 3966 000014D0 D84818 <1> fmul %2 3967 000014D3 D95D38 <1> fstp _A(%1+12) 3968 000014D6 DEC2 <1> faddp st2 3969 000014D8 DEC2 <1> faddp st2 3970 000014DA D95D08 <1> fstp _A(%1) 3971 000014DD D95D28 <1> fstp _A(%1+8) 3972 3973 ; A03 = (Sample[ 2] - Sample[29]) * C[ 5]; 3974 ; A11 = (Sample[13] - Sample[18]) * C[27]; 3975 ; B03 = A03 + A11; 3976 ; B11 = (A03 - A11) * C[10]; 3977 ; A07 = (Sample[ 5] - Sample[26]) * C[11]; 3978 ; A15 = (Sample[10] - Sample[21]) * C[21]; 3979 ; B07 = A07 + A15; 3980 ; B15 = (A07 - A15) * C[22]; 3981 3982 op4 2, _C05, _C27, _C10 3983 000014E0 D94108 <1> fld _S(%1) 3984 000014E3 D86174 <1> fsub _S(31-%1) 3985 000014E6 D8484C <1> fmul %2 3986 000014E9 D94134 <1> fld _S(15-%1) 3987 000014EC D86148 <1> fsub _S(16+%1) 3988 000014EF D8486C <1> fmul %3 3989 000014F2 D9C1 <1> fld st1 3990 000014F4 D8E1 <1> fsub st0, st1 3991 000014F6 D8482C <1> fmul %4 3992 000014F9 D9CA <1> fxch st2 3993 000014FB DEC1 <1> faddp st1 3994 op4 5, _C11, _C21, _C22 3995 000014FD D94114 <1> fld _S(%1) 3996 00001500 D86168 <1> fsub _S(31-%1) 3997 00001503 D8485C <1> fmul %2 3998 00001506 D94128 <1> fld _S(15-%1) 3999 00001509 D86154 <1> fsub _S(16+%1) 4000 0000150C D8487C <1> fmul %3 4001 0000150F D9C1 <1> fld st1 4002 00001511 D8E1 <1> fsub st0, st1 4003 00001513 D8483C <1> fmul %4 4004 00001516 D9CA <1> fxch st2 4005 00001518 DEC1 <1> faddp st1 4006 4007 ; A03 = B03 + B07; 4008 ; A07 = (B03 - B07) * C[20]; 4009 ; A11 = B11 + B15; 4010 ; A15 = (B11 - B15) * C[20]; 4011 4012 opx 3, _C20 4013 0000151A D9C2 <1> fld st2 4014 0000151C D8E1 <1> fsub st0, st1 4015 0000151E D8481C <1> fmul %2 4016 00001521 D95D1C <1> fstp _A(%1+4) 4017 00001524 D9C3 <1> fld st3 4018 00001526 D8E2 <1> fsub st0, st2 4019 00001528 D8481C <1> fmul %2 4020 0000152B D95D3C <1> fstp _A(%1+12) 4021 0000152E DEC2 <1> faddp st2 4022 00001530 DEC2 <1> faddp st2 4023 00001532 D95D0C <1> fstp _A(%1) 4024 00001535 D95D2C <1> fstp _A(%1+8) 4025 4026 ; B00 = A00 + A02; 4027 ; B01 = A01 + A03; 4028 ; A00 = B00 + B01; 4029 ; A01 = (B00 - B01) * C[16]; 4030 ; B02 = (A00 - A02) * C[ 8]; 4031 ; B03 = (A01 - A03) * C[24]; 4032 ; A02 = B02 + B03; 4033 ; A03 = (B02 - B03) * C[16]; 4034 4035 op2 0 4036 00001538 D94500 <1> fld _A(%1) 4037 0000153B D84508 <1> fadd _A(%1+2) 4038 0000153E D94504 <1> fld _A(%1+1) 4039 00001541 D8450C <1> fadd _A(%1+3) 4040 00001544 D9C1 <1> fld st1 4041 00001546 D8E1 <1> fsub st0, st1 4042 00001548 D808 <1> fmul _C16 4043 0000154A D95D44 <1> fstp _B(%1+1) 4044 0000154D DEC1 <1> faddp st1 4045 0000154F D95D40 <1> fstp _B(%1) 4046 00001552 D94500 <1> fld _A(%1) 4047 00001555 D86508 <1> fsub _A(%1+2) 4048 00001558 D84808 <1> fmul _C08 4049 0000155B D94504 <1> fld _A(%1+1) 4050 0000155E D8650C <1> fsub _A(%1+3) 4051 00001561 D8480C <1> fmul _C24 4052 00001564 D9C1 <1> fld st1 4053 00001566 D8E1 <1> fsub st0, st1 4054 00001568 D808 <1> fmul _C16 4055 0000156A D95D4C <1> fstp _B(%1+3) 4056 0000156D DEC1 <1> faddp st1 4057 0000156F D95D48 <1> fstp _B(%1+2) 4058 4059 ; B04 = A04 + A06; 4060 ; B05 = A05 + A07; 4061 ; A04 = B04 + B05; 4062 ; A05 = (B04 - B05) * C[16]; 4063 ; B06 = (A04 - A06) * C[ 8]; 4064 ; B07 = (A05 - A07) * C[24]; 4065 ; A06 = B06 + B07; 4066 ; A07 = (B06 - B07) * C[16]; 4067 4068 op2 4 4069 00001572 D94510 <1> fld _A(%1) 4070 00001575 D84518 <1> fadd _A(%1+2) 4071 00001578 D94514 <1> fld _A(%1+1) 4072 0000157B D8451C <1> fadd _A(%1+3) 4073 0000157E D9C1 <1> fld st1 4074 00001580 D8E1 <1> fsub st0, st1 4075 00001582 D808 <1> fmul _C16 4076 00001584 D95D54 <1> fstp _B(%1+1) 4077 00001587 DEC1 <1> faddp st1 4078 00001589 D95D50 <1> fstp _B(%1) 4079 0000158C D94510 <1> fld _A(%1) 4080 0000158F D86518 <1> fsub _A(%1+2) 4081 00001592 D84808 <1> fmul _C08 4082 00001595 D94514 <1> fld _A(%1+1) 4083 00001598 D8651C <1> fsub _A(%1+3) 4084 0000159B D8480C <1> fmul _C24 4085 0000159E D9C1 <1> fld st1 4086 000015A0 D8E1 <1> fsub st0, st1 4087 000015A2 D808 <1> fmul _C16 4088 000015A4 D95D5C <1> fstp _B(%1+3) 4089 000015A7 DEC1 <1> faddp st1 4090 000015A9 D95D58 <1> fstp _B(%1+2) 4091 4092 ; B08 = A08 + A10; 4093 ; B09 = A09 + A11; 4094 ; A08 = B08 + B09; 4095 ; A09 = (B08 - B09) * C[16]; 4096 ; B10 = (A08 - A10) * C[ 8]; 4097 ; B11 = (A09 - A11) * C[24]; 4098 ; A10 = B10 + B11; 4099 ; A11 = (B10 - B11) * C[16]; 4100 4101 op2 8 4102 000015AC D94520 <1> fld _A(%1) 4103 000015AF D84528 <1> fadd _A(%1+2) 4104 000015B2 D94524 <1> fld _A(%1+1) 4105 000015B5 D8452C <1> fadd _A(%1+3) 4106 000015B8 D9C1 <1> fld st1 4107 000015BA D8E1 <1> fsub st0, st1 4108 000015BC D808 <1> fmul _C16 4109 000015BE D95D64 <1> fstp _B(%1+1) 4110 000015C1 DEC1 <1> faddp st1 4111 000015C3 D95D60 <1> fstp _B(%1) 4112 000015C6 D94520 <1> fld _A(%1) 4113 000015C9 D86528 <1> fsub _A(%1+2) 4114 000015CC D84808 <1> fmul _C08 4115 000015CF D94524 <1> fld _A(%1+1) 4116 000015D2 D8652C <1> fsub _A(%1+3) 4117 000015D5 D8480C <1> fmul _C24 4118 000015D8 D9C1 <1> fld st1 4119 000015DA D8E1 <1> fsub st0, st1 4120 000015DC D808 <1> fmul _C16 4121 000015DE D95D6C <1> fstp _B(%1+3) 4122 000015E1 DEC1 <1> faddp st1 4123 000015E3 D95D68 <1> fstp _B(%1+2) 4124 4125 ; B12 = A12 + A14; 4126 ; B13 = A13 + A15; 4127 ; A12 = B12 + B13; 4128 ; A13 = (B12 - B13) * C[16]; 4129 ; B14 = (A12 - A14) * C[ 8]; 4130 ; B15 = (A13 - A15) * C[24]; 4131 ; A14 = B14 + B15; 4132 ; A15 = (B14 - B15) * C[16]; 4133 4134 op2 12 4135 000015E6 D94530 <1> fld _A(%1) 4136 000015E9 D84538 <1> fadd _A(%1+2) 4137 000015EC D94534 <1> fld _A(%1+1) 4138 000015EF D8453C <1> fadd _A(%1+3) 4139 000015F2 D9C1 <1> fld st1 4140 000015F4 D8E1 <1> fsub st0, st1 4141 000015F6 D808 <1> fmul _C16 4142 000015F8 D95D74 <1> fstp _B(%1+1) 4143 000015FB DEC1 <1> faddp st1 4144 000015FD D95D70 <1> fstp _B(%1) 4145 00001600 D94530 <1> fld _A(%1) 4146 00001603 D86538 <1> fsub _A(%1+2) 4147 00001606 D84808 <1> fmul _C08 4148 00001609 D94534 <1> fld _A(%1+1) 4149 0000160C D8653C <1> fsub _A(%1+3) 4150 0000160F D8480C <1> fmul _C24 4151 00001612 D9C1 <1> fld st1 4152 00001614 D8E1 <1> fsub st0, st1 4153 00001616 D808 <1> fmul _C16 4154 00001618 D95D7C <1> fstp _B(%1+3) 4155 0000161B DEC1 <1> faddp st1 4156 0000161D D95D78 <1> fstp _B(%1+2) 4157 4158 ; V[ 5] = (V[11] = (V[13] = A07 + (V[15] = A15)) + A11) + A05 + A13; 4159 4160 00001620 D9457C fld _B(15) 4161 00001623 D952BC fst _V(15) 4162 00001626 D8455C fadd _B(7) 4163 00001629 D952B4 fst _V(13) 4164 0000162C D8456C fadd _B(11) 4165 0000162F D952AC fst _V(11) 4166 00001632 D84554 fadd _B(5) 4167 00001635 D84574 fadd _B(13) 4168 00001638 D95A94 fstp _V(5) 4169 4170 ; V[ 7] = (V[ 9] = A03 + A11 + A15) + A13; 4171 4172 0000163B D9454C fld _B(3) 4173 0000163E D8456C fadd _B(11) 4174 00001641 D8457C fadd _B(15) 4175 00001644 D952A4 fst _V(9) 4176 00001647 D84574 fadd _B(13) 4177 0000164A D95A9C fstp _V(7) 4178 4179 ; V[33] = -(V[ 1] = A01 + A09 + A13 + A15) - A14; 4180 4181 0000164D D94544 fld _B(1) 4182 00001650 D84564 fadd _B(9) 4183 00001653 D84574 fadd _B(13) 4184 00001656 D8457C fadd _B(15) 4185 00001659 D95284 fst _V(1) 4186 0000165C D84578 fadd _B(14) 4187 0000165F D9E0 fchs 4188 00001661 D95A04 fstp _V(33) 4189 4190 ; V[35] = -(V[ 3] = A05 + A07 + A09 + A13 + A15) - A06 - A14; 4191 4192 00001664 D94554 fld _B(5) 4193 00001667 D8455C fadd _B(7) 4194 0000166A D84564 fadd _B(9) 4195 0000166D D84574 fadd _B(13) 4196 00001670 D8457C fadd _B(15) 4197 00001673 D9528C fst _V(3) 4198 00001676 D84558 fadd _B(6) 4199 00001679 D84578 fadd _B(14) 4200 0000167C D9E0 fchs 4201 0000167E D95A0C fstp _V(35) 4202 4203 ; V[37] = (tmp = -(A10 + A11 + A13 + A14 + A15)) - A05 - A06 - A07; 4204 4205 00001681 D94568 fld _B(10) 4206 00001684 D8456C fadd _B(11) 4207 00001687 D84574 fadd _B(13) 4208 0000168A D84578 fadd _B(14) 4209 0000168D D8457C fadd _B(15) 4210 00001690 D9E0 fchs 4211 00001692 D9C0 fld st0 4212 00001694 D86554 fsub _B(5) 4213 00001697 D86558 fsub _B(6) 4214 0000169A D8655C fsub _B(7) 4215 0000169D D95A14 fstp _V(37) 4216 4217 ; V[39] = tmp - A02 - A03; // abhängig vom Befehl drüber 4218 4219 000016A0 D9C0 fld st0 4220 000016A2 D86548 fsub _B(2) 4221 000016A5 D8654C fsub _B(3) 4222 000016A8 D95A1C fstp _V(39) 4223 4224 ; V[41] = (tmp += A13 - A12) - A02 - A03; // abhängig vom Befehl 2 drüber 4225 4226 000016AB D84574 fadd _B(13) 4227 000016AE D86570 fsub _B(12) 4228 000016B1 D9C0 fld st0 4229 000016B3 D86548 fsub _B(2) 4230 000016B6 D8654C fsub _B(3) 4231 000016B9 D95A24 fstp _V(41) 4232 4233 ; V[43] = tmp - A04 - A06 - A07; // abhängig von Befehlen 1 und 3 drüber 4234 4235 000016BC D86550 fsub _B(4) 4236 000016BF D86558 fsub _B(6) 4237 000016C2 D8655C fsub _B(7) 4238 000016C5 D95A2C fstp _V(43) 4239 4240 ; V[47] = (tmp = -(A08 + A12 + A14 + A15)) - A00; 4241 4242 000016C8 D94560 fld _B(8) 4243 000016CB D84570 fadd _B(12) 4244 000016CE D84578 fadd _B(14) 4245 000016D1 D8457C fadd _B(15) 4246 000016D4 D9E0 fchs 4247 000016D6 D9C0 fld st0 4248 000016D8 D86540 fsub _B(0) 4249 000016DB D95A3C fstp _V(47) 4250 4251 ; V[45] = tmp - A04 - A06 - A07; // abhängig vom Befehl drüber 4252 4253 000016DE D86550 fsub _B(4) 4254 000016E1 D86558 fsub _B(6) 4255 000016E4 D8655C fsub _B(7) 4256 000016E7 D95A34 fstp _V(45) 4257 4258 ; ((Uint32_t*)V)[32-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 0-32]; 4259 ; ((Uint32_t*)V)[31-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 1-32]; 4260 ; ((Uint32_t*)V)[30-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 2-32]; 4261 ; ((Uint32_t*)V)[29-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 3-32]; 4262 ; ((Uint32_t*)V)[28-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 4-32]; 4263 ; ((Uint32_t*)V)[27-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 5-32]; 4264 ; ((Uint32_t*)V)[26-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 6-32]; 4265 ; ((Uint32_t*)V)[25-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 7-32]; 4266 ; ((Uint32_t*)V)[24-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 8-32]; 4267 ; ((Uint32_t*)V)[23-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[ 9-32]; 4268 ; ((Uint32_t*)V)[22-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[10-32]; 4269 ; ((Uint32_t*)V)[21-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[11-32]; 4270 ; ((Uint32_t*)V)[20-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[12-32]; 4271 ; ((Uint32_t*)V)[19-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[13-32]; 4272 ; ((Uint32_t*)V)[18-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[14-32]; 4273 ; ((Uint32_t*)V)[17-32] = (Uint32_t)0x80000000L + ((Uint32_t*)V)[15-32]; 4274 4275 000016EA B900000080 mov ecx, 0x80000000 4276 %assign i 0 4277 %rep 16 4278 mov eax, _V(i) 4279 add eax, ecx 4280 mov _V(32-i), eax 4281 %assign i i+1 4282 %endrep 4283 000016EF 8B4280 <1> mov eax, _V(i) 4284 000016F2 01C8 <1> add eax, ecx 4285 000016F4 8902 <1> mov _V(32-i), eax 4286 <1> %assign i i+1 4287 000016F6 8B4284 <1> mov eax, _V(i) 4288 000016F9 01C8 <1> add eax, ecx 4289 000016FB 8942FC <1> mov _V(32-i), eax 4290 <1> %assign i i+1 4291 000016FE 8B4288 <1> mov eax, _V(i) 4292 00001701 01C8 <1> add eax, ecx 4293 00001703 8942F8 <1> mov _V(32-i), eax 4294 <1> %assign i i+1 4295 00001706 8B428C <1> mov eax, _V(i) 4296 00001709 01C8 <1> add eax, ecx 4297 0000170B 8942F4 <1> mov _V(32-i), eax 4298 <1> %assign i i+1 4299 0000170E 8B4290 <1> mov eax, _V(i) 4300 00001711 01C8 <1> add eax, ecx 4301 00001713 8942F0 <1> mov _V(32-i), eax 4302 <1> %assign i i+1 4303 00001716 8B4294 <1> mov eax, _V(i) 4304 00001719 01C8 <1> add eax, ecx 4305 0000171B 8942EC <1> mov _V(32-i), eax 4306 <1> %assign i i+1 4307 0000171E 8B4298 <1> mov eax, _V(i) 4308 00001721 01C8 <1> add eax, ecx 4309 00001723 8942E8 <1> mov _V(32-i), eax 4310 <1> %assign i i+1 4311 00001726 8B429C <1> mov eax, _V(i) 4312 00001729 01C8 <1> add eax, ecx 4313 0000172B 8942E4 <1> mov _V(32-i), eax 4314 <1> %assign i i+1 4315 0000172E 8B42A0 <1> mov eax, _V(i) 4316 00001731 01C8 <1> add eax, ecx 4317 00001733 8942E0 <1> mov _V(32-i), eax 4318 <1> %assign i i+1 4319 00001736 8B42A4 <1> mov eax, _V(i) 4320 00001739 01C8 <1> add eax, ecx 4321 0000173B 8942DC <1> mov _V(32-i), eax 4322 <1> %assign i i+1 4323 0000173E 8B42A8 <1> mov eax, _V(i) 4324 00001741 01C8 <1> add eax, ecx 4325 00001743 8942D8 <1> mov _V(32-i), eax 4326 <1> %assign i i+1 4327 00001746 8B42AC <1> mov eax, _V(i) 4328 00001749 01C8 <1> add eax, ecx 4329 0000174B 8942D4 <1> mov _V(32-i), eax 4330 <1> %assign i i+1 4331 0000174E 8B42B0 <1> mov eax, _V(i) 4332 00001751 01C8 <1> add eax, ecx 4333 00001753 8942D0 <1> mov _V(32-i), eax 4334 <1> %assign i i+1 4335 00001756 8B42B4 <1> mov eax, _V(i) 4336 00001759 01C8 <1> add eax, ecx 4337 0000175B 8942CC <1> mov _V(32-i), eax 4338 <1> %assign i i+1 4339 0000175E 8B42B8 <1> mov eax, _V(i) 4340 00001761 01C8 <1> add eax, ecx 4341 00001763 8942C8 <1> mov _V(32-i), eax 4342 <1> %assign i i+1 4343 00001766 8B42BC <1> mov eax, _V(i) 4344 00001769 01C8 <1> add eax, ecx 4345 0000176B 8942C4 <1> mov _V(32-i), eax 4346 <1> %assign i i+1 4347 4348 ; ((Uint32_t*)V)[63-32] = ((Uint32_t*)V)[33-32]; 4349 ; ((Uint32_t*)V)[62-32] = ((Uint32_t*)V)[34-32]; 4350 ; ((Uint32_t*)V)[61-32] = ((Uint32_t*)V)[35-32]; 4351 ; ((Uint32_t*)V)[60-32] = ((Uint32_t*)V)[36-32]; 4352 ; ((Uint32_t*)V)[59-32] = ((Uint32_t*)V)[37-32]; 4353 ; ((Uint32_t*)V)[58-32] = ((Uint32_t*)V)[38-32]; 4354 ; ((Uint32_t*)V)[57-32] = ((Uint32_t*)V)[39-32]; 4355 ; ((Uint32_t*)V)[56-32] = ((Uint32_t*)V)[40-32]; 4356 ; ((Uint32_t*)V)[55-32] = ((Uint32_t*)V)[41-32]; 4357 ; ((Uint32_t*)V)[54-32] = ((Uint32_t*)V)[42-32]; 4358 ; ((Uint32_t*)V)[53-32] = ((Uint32_t*)V)[43-32]; 4359 ; ((Uint32_t*)V)[52-32] = ((Uint32_t*)V)[44-32]; 4360 ; ((Uint32_t*)V)[51-32] = ((Uint32_t*)V)[45-32]; 4361 ; ((Uint32_t*)V)[50-32] = ((Uint32_t*)V)[46-32]; 4362 ; ((Uint32_t*)V)[49-32] = ((Uint32_t*)V)[47-32]; 4363 4364 %assign i 1 4365 %rep 15 4366 mov eax, _V(32+i) 4367 mov _V(64-i), eax 4368 %assign i i+1 4369 %endrep 4370 0000176E 8B4204 <1> mov eax, _V(32+i) 4371 00001771 89427C <1> mov _V(64-i), eax 4372 <1> %assign i i+1 4373 00001774 8B4208 <1> mov eax, _V(32+i) 4374 00001777 894278 <1> mov _V(64-i), eax 4375 <1> %assign i i+1 4376 0000177A 8B420C <1> mov eax, _V(32+i) 4377 0000177D 894274 <1> mov _V(64-i), eax 4378 <1> %assign i i+1 4379 00001780 8B4210 <1> mov eax, _V(32+i) 4380 00001783 894270 <1> mov _V(64-i), eax 4381 <1> %assign i i+1 4382 00001786 8B4214 <1> mov eax, _V(32+i) 4383 00001789 89426C <1> mov _V(64-i), eax 4384 <1> %assign i i+1 4385 0000178C 8B4218 <1> mov eax, _V(32+i) 4386 0000178F 894268 <1> mov _V(64-i), eax 4387 <1> %assign i i+1 4388 00001792 8B421C <1> mov eax, _V(32+i) 4389 00001795 894264 <1> mov _V(64-i), eax 4390 <1> %assign i i+1 4391 00001798 8B4220 <1> mov eax, _V(32+i) 4392 0000179B 894260 <1> mov _V(64-i), eax 4393 <1> %assign i i+1 4394 0000179E 8B4224 <1> mov eax, _V(32+i) 4395 000017A1 89425C <1> mov _V(64-i), eax 4396 <1> %assign i i+1 4397 000017A4 8B4228 <1> mov eax, _V(32+i) 4398 000017A7 894258 <1> mov _V(64-i), eax 4399 <1> %assign i i+1 4400 000017AA 8B422C <1> mov eax, _V(32+i) 4401 000017AD 894254 <1> mov _V(64-i), eax 4402 <1> %assign i i+1 4403 000017B0 8B4230 <1> mov eax, _V(32+i) 4404 000017B3 894250 <1> mov _V(64-i), eax 4405 <1> %assign i i+1 4406 000017B6 8B4234 <1> mov eax, _V(32+i) 4407 000017B9 89424C <1> mov _V(64-i), eax 4408 <1> %assign i i+1 4409 000017BC 8B4238 <1> mov eax, _V(32+i) 4410 000017BF 894248 <1> mov _V(64-i), eax 4411 <1> %assign i i+1 4412 000017C2 8B423C <1> mov eax, _V(32+i) 4413 000017C5 894244 <1> mov _V(64-i), eax 4414 <1> %assign i i+1 4415 4416 000017C8 83EC80 sub esp, byte -128 4417 000017CB 5D pop ebp 4418 endproc 4419 <1> %ifnctx proc 4420 <1> %error expected 'proc' before 'endproc'. 4421 <1> %else 4422 <1> %if %$STACK > 0 4423 <1> add esp, %$STACK 4424 <1> %endif 4425 <1> 4426 <1> %if %$STACK <> (-%$STACKN) 4427 <1> %error STACKLEVEL mismatch check 'local', 'alloc', 'pushd', 'popd' 4428 <1> %endif 4429 <1> 4430 000017CC C3 <1> ret 4431 <1> %pop 4432 <1> %endif 4433 4434 ; 4435 ; end of synthasm.nas 4436 ;