/* * This file is part of the Advance project. * * Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Andrea Mazzoleni * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * In addition, as a special exception, Andrea Mazzoleni * gives permission to link the code of this program with * the MAME library (or with modified versions of MAME that use the * same license as MAME), and distribute linked combinations including * the two. You must obey the GNU General Public License in all * respects for all of the code used other than MAME. If you modify * this file, you may extend this exception to your version of the * file, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. */ #ifndef __ICONV_H #define __ICONV_H #include "icommon.h" /***************************************************************************/ /* internal conv */ #if defined(USE_ASM_INLINE) static uint32 bgra8888tobgr332_mask[] = { 0x000000E0, 0x000000E0, /* r */ 0x0000001C, 0x0000001C, /* g */ 0x00000003, 0x00000003 /* b */ }; static inline void internal_convbgra8888tobgr332_mmx(void* dst, const void* src, unsigned count) { unsigned rest = count % 8; __asm__ __volatile__( "shrl $3, %2\n" "jz 1f\n" "movq (%3), %%mm5\n" "movq 8(%3), %%mm6\n" "movq 16(%3), %%mm7\n" ASM_JUMP_ALIGN "0:\n" "movq (%0), %%mm0\n" "movq %%mm0, %%mm1\n" "movq %%mm0, %%mm2\n" "psrlq $16, %%mm0\n" "psrlq $11, %%mm1\n" "psrlq $6, %%mm2\n" "pand %%mm5, %%mm0\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm0\n" "movq 8(%0), %%mm3\n" "movq %%mm3, %%mm1\n" "movq %%mm3, %%mm2\n" "psrlq $16, %%mm3\n" "psrlq $11, %%mm1\n" "psrlq $6, %%mm2\n" "pand %%mm5, %%mm3\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm3\n" "packuswb %%mm3, %%mm0\n" "movq 16(%0), %%mm4\n" "movq %%mm4, %%mm1\n" "movq %%mm4, %%mm2\n" "psrlq $16, %%mm4\n" "psrlq $11, %%mm1\n" "psrlq $6, %%mm2\n" "pand %%mm5, %%mm4\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm4\n" "movq 24(%0), %%mm3\n" "movq %%mm3, %%mm1\n" "movq %%mm3, %%mm2\n" "psrlq $16, %%mm3\n" "psrlq $11, %%mm1\n" "psrlq $6, %%mm2\n" "pand %%mm5, %%mm3\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm3\n" "packuswb %%mm3, %%mm4\n" "packuswb %%mm4, %%mm0\n" "movq %%mm0, (%1)\n" "addl $32, %0\n" "addl $8, %1\n" "decl %2\n" "jnz 0b\n" "1:\n" : "+S" (src), "+D" (dst), "+c" (count) : "r" (bgra8888tobgr332_mask) : "cc" ); if (rest) { const uint32* src32 = src; uint8* dst8 = dst; do { *dst8 = ((src32[0] >> (8-2)) & 0x03) | ((src32[0] >> (16-3-2)) & 0x1C) | ((src32[0] >> (24-3-3-2)) & 0xE0); ++src32; ++dst8; --rest; } while (rest); } } #endif static inline void internal_convbgra8888tobgr332_def(void* dst, const void* src, unsigned count) { uint32* src32 = (uint32*)src; uint32* dst32 = (uint32*)dst; while (count) { #ifdef USE_LSB *dst32++ = ((src32[0] >> (8-2)) & 0x03) | ((src32[0] >> (16-3-2)) & 0x1C) | ((src32[0] >> (24-3-3-2)) & 0xE0) | ((src32[1] << -(8-2-8)) & 0x0300) | ((src32[1] >> (16-3-2-8)) & 0x1C00) | ((src32[1] >> (24-3-3-2-8)) & 0xE000) | ((src32[2] << -(8-2-16)) & 0x030000) | ((src32[2] << -(16-3-2-16)) & 0x1C0000) | ((src32[2] >> (24-3-3-2-16)) & 0xE00000) | ((src32[3] << -(8-2-24)) & 0x03000000) | ((src32[3] << -(16-3-2-24)) & 0x1C000000) | ((src32[3] << -(24-3-3-2-24)) & 0xE0000000); #else *dst32++ = ((src32[3] >> (8-2)) & 0x03) | ((src32[3] >> (16-3-2)) & 0x1C) | ((src32[3] >> (24-3-3-2)) & 0xE0) | ((src32[2] << -(8-2-8)) & 0x0300) | ((src32[2] >> (16-3-2-8)) & 0x1C00) | ((src32[2] >> (24-3-3-2-8)) & 0xE000) | ((src32[1] << -(8-2-16)) & 0x030000) | ((src32[1] << -(16-3-2-16)) & 0x1C0000) | ((src32[1] >> (24-3-3-2-16)) & 0xE00000) | ((src32[0] << -(8-2-24)) & 0x03000000) | ((src32[0] << -(16-3-2-24)) & 0x1C000000) | ((src32[0] << -(24-3-3-2-24)) & 0xE0000000); #endif src32 += 4; --count; } } #if defined(USE_ASM_INLINE) static uint32 bgra8888tobgr565_mask[] = { 0x00F80000, 0x00F80000, /* r << 8 */ 0x000007E0, 0x000007E0, /* g */ 0x0000001F, 0x0000001F /* b */ }; static inline void internal_convbgra8888tobgr565_mmx(void* dst, const void* src, unsigned count) { unsigned rest = count % 4; __asm__ __volatile__( "shrl $2, %2\n" "jz 1f\n" "movq (%3), %%mm5\n" "movq 8(%3), %%mm6\n" "movq 16(%3), %%mm7\n" ASM_JUMP_ALIGN "0:\n" "movq (%0), %%mm0\n" "movq %%mm0, %%mm1\n" "movq %%mm0, %%mm2\n" "psrld $5, %%mm1\n" "psrld $3, %%mm2\n" "pand %%mm5, %%mm0\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "movq 8(%0), %%mm3\n" "movq %%mm3, %%mm4\n" "movq %%mm3, %%mm2\n" "psrld $5, %%mm4\n" "psrld $3, %%mm2\n" "pand %%mm5, %%mm3\n" "pand %%mm6, %%mm4\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm4\n" "packuswb %%mm3, %%mm0\n" "packssdw %%mm4, %%mm1\n" "por %%mm1, %%mm0\n" "movq %%mm0, (%1)\n" "addl $16, %0\n" "addl $8, %1\n" "decl %2\n" "jnz 0b\n" "1:\n" : "+S" (src), "+D" (dst), "+c" (count) : "r" (bgra8888tobgr565_mask) : "cc" ); if (rest) { const uint32* src32 = src; uint16* dst16 = dst; do { *dst16 = ((src32[0] >> (8-5)) & 0x001F) | ((src32[0] >> (16-5-6)) & 0x07E0) | ((src32[0] >> (24-5-6-5)) & 0xF800); ++src32; ++dst16; --rest; } while (rest); } } #endif static inline void internal_convbgra8888tobgr565_def(void* dst, const void* src, unsigned count) { uint32* src32 = (uint32*)src; uint32* dst32 = (uint32*)dst; count /= 2; while (count) { #ifdef USE_LSB *dst32++ = ((src32[0] >> (8-5)) & 0x001F) | ((src32[0] >> (16-5-6)) & 0x07E0) | ((src32[0] >> (24-5-6-5)) & 0xF800) | ((src32[1] << -(8-5-16)) & 0x001F0000) | ((src32[1] << -(16-5-6-16)) & 0x07E00000) | ((src32[1] << -(24-5-6-5-16)) & 0xF8000000); #else *dst32++ = ((src32[1] >> (8-5)) & 0x001F) | ((src32[1] >> (16-5-6)) & 0x07E0) | ((src32[1] >> (24-5-6-5)) & 0xF800) | ((src32[0] << -(8-5-16)) & 0x001F0000) | ((src32[0] << -(16-5-6-16)) & 0x07E00000) | ((src32[0] << -(24-5-6-5-16)) & 0xF8000000); #endif src32 += 2; --count; } } #if defined(USE_ASM_INLINE) static uint32 bgra8888tobgra5551_mask[] = { 0x00007C00, 0x00007C00, /* r */ 0x000003E0, 0x000003E0, /* g */ 0x0000001F, 0x0000001F /* b */ }; static inline void internal_convbgra8888tobgra5551_mmx(void* dst, const void* src, unsigned count) { unsigned rest = count % 4; __asm__ __volatile__( "shrl $2, %2\n" "jz 1f\n" "movq (%3), %%mm5\n" "movq 8(%3), %%mm6\n" "movq 16(%3), %%mm7\n" ASM_JUMP_ALIGN "0:\n" "movq (%0), %%mm0\n" "movq %%mm0, %%mm1\n" "movq %%mm0, %%mm2\n" "psrld $9, %%mm0\n" "psrld $6, %%mm1\n" "psrld $3, %%mm2\n" "pand %%mm5, %%mm0\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm0\n" "movq 8(%0), %%mm3\n" "movq %%mm3, %%mm1\n" "movq %%mm3, %%mm2\n" "psrld $9, %%mm3\n" "psrld $6, %%mm1\n" "psrld $3, %%mm2\n" "pand %%mm5, %%mm3\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm3\n" "packssdw %%mm3, %%mm0\n" "movq %%mm0, (%1)\n" "addl $16, %0\n" "addl $8, %1\n" "decl %2\n" "jnz 0b\n" "1:\n" : "+S" (src), "+D" (dst), "+c" (count) : "r" (bgra8888tobgra5551_mask) : "cc" ); if (rest) { const uint32* src32 = src; uint16* dst16 = dst; do { *dst16 = ((src32[0] >> (8-5)) & 0x001F) | ((src32[0] >> (16-5-5)) & 0x03E0) | ((src32[0] >> (24-5-5-5)) & 0x7C00); ++src32; ++dst16; --rest; } while (rest); } } #endif static inline void internal_convbgra8888tobgra5551_def(void* dst, const void* src, unsigned count) { uint32* src32 = (uint32*)src; uint32* dst32 = (uint32*)dst; count /= 2; while (count) { #ifdef USE_LSB *dst32++ = ((src32[0] >> (8-5)) & 0x001F) | ((src32[0] >> (16-5-5)) & 0x03E0) | ((src32[0] >> (24-5-5-5)) & 0x7C00) | ((src32[1] << -(8-5-16)) & 0x001F0000) | ((src32[1] << -(16-5-5-16)) & 0x03E00000) | ((src32[1] << -(24-5-5-5-16)) & 0x7C000000); #else *dst32++ = ((src32[1] >> (8-5)) & 0x001F) | ((src32[1] >> (16-5-5)) & 0x03E0) | ((src32[1] >> (24-5-5-5)) & 0x7C00) | ((src32[0] << -(8-5-16)) & 0x001F0000) | ((src32[0] << -(16-5-5-16)) & 0x03E00000) | ((src32[0] << -(24-5-5-5-16)) & 0x7C000000); #endif src32 += 2; --count; } } #if defined(USE_ASM_INLINE) static uint32 bgra5551tobgr332_mask[] = { 0x00E000E0, 0x00E000E0, /* r */ 0x001C001C, 0x001C001C, /* g */ 0x00030003, 0x00030003 /* b */ }; static inline void internal_convbgra5551tobgr332_mmx(void* dst, const void* src, unsigned count) { unsigned rest = count % 8; __asm__ __volatile__( "shrl $3, %2\n" "jz 1f\n" "movq (%3), %%mm5\n" "movq 8(%3), %%mm6\n" "movq 16(%3), %%mm7\n" ASM_JUMP_ALIGN "0:\n" "movq (%0), %%mm0\n" "movq %%mm0, %%mm1\n" "movq %%mm0, %%mm2\n" "psrld $7, %%mm0\n" "psrld $5, %%mm1\n" "psrld $3, %%mm2\n" "pand %%mm5, %%mm0\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm0\n" "movq 8(%0), %%mm3\n" "movq %%mm3, %%mm1\n" "movq %%mm3, %%mm2\n" "psrld $7, %%mm3\n" "psrld $5, %%mm1\n" "psrld $3, %%mm2\n" "pand %%mm5, %%mm3\n" "pand %%mm6, %%mm1\n" "pand %%mm7, %%mm2\n" "por %%mm2, %%mm1\n" "por %%mm1, %%mm3\n" "packuswb %%mm3, %%mm0\n" "movq %%mm0, (%1)\n" "addl $16, %0\n" "addl $8, %1\n" "decl %2\n" "jnz 0b\n" "1:\n" : "+S" (src), "+D" (dst), "+c" (count) : "r" (bgra5551tobgr332_mask) : "cc" ); if (rest) { const uint16* src16 = src; uint8* dst8 = dst; do { *dst8 = ((src16[0] >> (5-2)) & 0x03) | ((src16[0] >> (10-3-2)) & 0x1C) | ((src16[0] >> (15-3-3-2)) & 0xE0); ++src16; ++dst8; --rest; } while (rest); } } #endif static inline void internal_convbgra5551tobgr332_def(void* dst, const void* src, unsigned count) { uint16* src16 = (uint16*)src; uint32* dst32 = (uint32*)dst; count /= 4; while (count) { #ifdef USE_LSB *dst32++ = ((src16[0] >> (5-2)) & 0x03) | ((src16[0] >> (10-3-2)) & 0x1C) | ((src16[0] >> (15-3-3-2)) & 0xE0) | ((src16[1] << -(5-2-8)) & 0x0300) | ((src16[1] << -(10-3-2-8)) & 0x1C00) | ((src16[1] << -(15-3-3-2-8)) & 0xE000) | ((src16[2] << -(5-2-16)) & 0x030000) | ((src16[2] << -(10-3-2-16)) & 0x1C0000) | ((src16[2] << -(15-3-3-2-16)) & 0xE00000) | ((src16[3] << -(5-2-24)) & 0x03000000) | ((src16[3] << -(10-3-2-24)) & 0x1C000000) | ((src16[3] << -(15-3-3-2-24)) & 0xE0000000); #else *dst32++ = ((src16[3] >> (5-2)) & 0x03) | ((src16[3] >> (10-3-2)) & 0x1C) | ((src16[3] >> (15-3-3-2)) & 0xE0) | ((src16[2] << -(5-2-8)) & 0x0300) | ((src16[2] << -(10-3-2-8)) & 0x1C00) | ((src16[2] << -(15-3-3-2-8)) & 0xE000) | ((src16[1] << -(5-2-16)) & 0x030000) | ((src16[1] << -(10-3-2-16)) & 0x1C0000) | ((src16[1] << -(15-3-3-2-16)) & 0xE00000) | ((src16[0] << -(5-2-24)) & 0x03000000) | ((src16[0] << -(10-3-2-24)) & 0x1C000000) | ((src16[0] << -(15-3-3-2-24)) & 0xE0000000); #endif src16 += 4; --count; } } #if defined(USE_ASM_INLINE) static uint32 bgra5551tobgr565_mask[] = { 0xFFC0FFC0, 0xFFC0FFC0, /* rg */ 0x001F001F, 0x001F001F /* b */ }; static inline void internal_convbgra5551tobgr565_mmx(void* dst, const void* src, unsigned count) { unsigned rest = count % 4; __asm__ __volatile__( "shrl $2, %2\n" "jz 1f\n" "movq (%3), %%mm2\n" "movq 8(%3), %%mm3\n" ASM_JUMP_ALIGN "0:\n" "movq (%0), %%mm0\n" "movq %%mm0, %%mm1\n" "pslld $1, %%mm0\n" "pand %%mm2, %%mm0\n" "pand %%mm3, %%mm1\n" "por %%mm1, %%mm0\n" "movq %%mm0, (%1)\n" "addl $8, %0\n" "addl $8, %1\n" "decl %2\n" "jnz 0b\n" "1:\n" : "+S" (src), "+D" (dst), "+c" (count) : "r" (bgra5551tobgr565_mask) : "cc" ); if (rest) { const uint16* src16 = src; uint16* dst16 = dst; do { *dst16 = (src16[0] & 0x001F) | ((src16[0] << 1) & 0xFFC0); ++src16; ++dst16; --rest; } while (rest); } } #endif static inline void internal_convbgra5551tobgr565_def(void* dst, const void* src, unsigned count) { uint32* src32 = (uint32*)src; uint32* dst32 = (uint32*)dst; count /= 2; while (count) { *dst32++ = (src32[0] & 0x001F001F) | ((src32[0] << 1) & 0xFFC0FFC0); src32 += 1; --count; } } #if defined(USE_ASM_INLINE) static uint32 bgra5551tobgra8888_mask[] = { 0x000000F8, 0x000000F8, /* r */ 0x0000F800, 0x0000F800, /* g */ 0x00F80000, 0x00F80000 /* b */ }; static inline void internal_convbgra5551tobgra8888_mmx(void* dst, const void* src, unsigned count) { unsigned rest = count % 2; __asm__ __volatile__( "shrl $1, %2\n" "jz 1f\n" "movq (%3), %%mm3\n" "movq 8(%3), %%mm4\n" "movq 16(%3), %%mm5\n" ASM_JUMP_ALIGN "0:\n" "movd (%0), %%mm0\n" "punpcklwd %%mm0, %%mm0\n" "movq %%mm0, %%mm1\n" "movq %%mm0, %%mm2\n" "pslld $3, %%mm0\n" "pslld $6, %%mm1\n" "pslld $9, %%mm2\n" "pand %%mm3, %%mm0\n" "pand %%mm4, %%mm1\n" "pand %%mm5, %%mm2\n" "por %%mm1, %%mm0\n" "por %%mm2, %%mm0\n" "movq %%mm0, (%1)\n" "addl $4, %0\n" "addl $8, %1\n" "decl %2\n" "jnz 0b\n" "1:\n" : "+S" (src), "+D" (dst), "+c" (count) : "r" (bgra5551tobgra8888_mask) : "cc" ); if (rest) { const uint16* src16 = src; uint32* dst32 = dst; do { *dst32 = ((src16[0] << 3) & 0x000000F8) | ((src16[0] << 6) & 0x0000F800) | ((src16[0] << 9) & 0x00F80000); ++src16; ++dst32; --rest; } while (rest); } } #endif static inline void internal_convbgra5551tobgra8888_def(void* dst, const void* src, unsigned count) { uint16* src16 = (uint16*)src; uint32* dst32 = (uint32*)dst; while (count) { *dst32++ = ((src16[0] << 3) & 0x000000F8) | ((src16[0] << 6) & 0x0000F800) | ((src16[0] << 9) & 0x00F80000); src16 += 1; --count; } } #if defined(USE_ASM_INLINE) /* Y = 0.299 R + 0.587 G + 0.114 B U = -0.1687 R - 0.3313 G + 0.5 B + 128 V = 0.5 R - 0.4187 G - 0.0813 B + 128 Y = (76*R + 150*G + 29*B) >> 8 U = (-43*R - 84*G + 128*B) >> 8 + 128 V = (128*R - 107*G - 20*B) >> 8 + 128 */ static uint32 bgra8888toyuy2_coeff[] = { /*uuuuyyyy vvvvyyyy */ 0x0080001d, 0xffec001d, /* b */ 0xffac0096, 0xff950096, /* g */ 0xffd5004c, 0x0080004c, /* r */ 0x80000000, 0x80000000 /* add */ }; static inline void pixel_convbgra8888toyuy2_mmx(void* dst, const void* src0, const void* src1) { __asm__ __volatile__ ( #if 0 /* OSDEF Reference code */ /* Basic single pixel implementation */ /* mm0 = 0 a 0 r 0 g 0 b */ /* transpose */ "movq %%mm2, %%mm1\n" "punpcklwd %%mm2, %%mm1\n" "punpckhwd %%mm2, %%mm2\n" "movq %%mm1, %%mm0\n" "punpckldq %%mm2, %%mm2\n" "punpckldq %%mm1, %%mm0\n" "punpckhdq %%mm1, %%mm1\n" /* mm0 = 0 b 0 b 0 b 0 b */ /* mm1 = 0 g 0 g 0 g 0 g */ /* mm2 = 0 r 0 r 0 r 0 r */ /* multiply */ "pmullw 0(%3), %%mm0\n" "pmullw 8(%3), %%mm1\n" "pmullw 16(%3), %%mm2\n" /* add the component without saturation */ "paddw %%mm1, %%mm0\n" "paddw 24(%3), %%mm2\n" "paddw %%mm2, %%mm0\n" /* reduce the precision */ "psrlw $8, %%mm0\n" /* mm0 = 0 v 0 y 0 u 0 y */ #endif /* Fast double pixel implementation */ "movd (%0), %%mm2\n" "movd (%1), %%mm5\n" "pxor %%mm0, %%mm0\n" "punpcklbw %%mm0, %%mm2\n" "movq %%mm2, %%mm1\n" "punpcklwd %%mm2, %%mm1\n" "punpckhwd %%mm2, %%mm2\n" "movq %%mm1, %%mm0\n" "punpckldq %%mm2, %%mm2\n" "punpckldq %%mm1, %%mm0\n" "pmullw 0(%3), %%mm0\n" "pxor %%mm3, %%mm3\n" "punpckhdq %%mm1, %%mm1\n" "punpcklbw %%mm3, %%mm5\n" "pmullw 8(%3), %%mm1\n" "movq %%mm5, %%mm4\n" "punpcklwd %%mm5, %%mm4\n" "punpckhwd %%mm5, %%mm5\n" "pmullw 16(%3), %%mm2\n" "movq %%mm4, %%mm3\n" "punpckldq %%mm5, %%mm5\n" "punpckldq %%mm4, %%mm3\n" "punpckhdq %%mm4, %%mm4\n" "pmullw 0(%3), %%mm3\n" "paddw %%mm1, %%mm0\n" "paddw 24(%3), %%mm2\n" "pmullw 8(%3), %%mm4\n" "paddw %%mm2, %%mm0\n" "pmullw 16(%3), %%mm5\n" "psrlw $8, %%mm0\n" "paddw %%mm4, %%mm3\n" "paddw 24(%3), %%mm5\n" "paddw %%mm5, %%mm3\n" "psrlw $8, %%mm3\n" "packuswb %%mm3, %%mm0\n" "movq %%mm0, (%2)\n" : : "r" (src0), "r" (src1), "r" (dst), "r" (bgra8888toyuy2_coeff) : "cc", "memory" ); } #endif static inline void pixel_convbgra8888toyuy2_def(void* dst, const void* src) { const uint8* src8 = (const uint8*)src; uint8* dst8 = (uint8*)dst; int r, g, b; int y, u, v; #ifdef USE_LSB b = src8[0]; g = src8[1]; r = src8[2]; #else b = src8[3]; g = src8[2]; r = src8[1]; #endif /* Y = 0.299 R + 0.587 G + 0.114 B U = -0.1687 R - 0.3313 G + 0.5 B + 128 V = 0.5 R - 0.4187 G - 0.0813 B + 128 */ y = ((19595*r + 38469*g + 7471*b) >> 16); u = ((-11055*r - 21712*g + 32768*b) >> 16) + 128; v = ((32768*r - 27439*g - 5328*b) >> 16) + 128; dst8[0] = y; dst8[1] = u; dst8[2] = y; dst8[3] = v; } static inline void pixel_alphabgra8888_def(void* dst, const void* src) { int a; uint8* src8 = (uint8*)src; uint8* dst8 = (uint8*)dst; #ifdef USE_LSB a = src8[3]; if (a == 0) { /* nothing */ } else if (a == 255) { uint32* src32 = (uint32*)src; uint32* dst32 = (uint32*)dst; *dst32 = *src32; } else { dst8[0] += (src8[0] - dst8[0]) * a / 256; dst8[1] += (src8[1] - dst8[1]) * a / 256; dst8[2] += (src8[2] - dst8[2]) * a / 256; } #else a = src8[0]; if (a == 0) { /* nothing */ } else if (a == 255) { uint32* src32 = (uint32*)src; uint32* dst32 = (uint32*)dst; *dst32 = *src32; } else { dst8[1] += (src8[1] - dst8[1]) * a / 256; dst8[2] += (src8[2] - dst8[2]) * a / 256; dst8[3] += (src8[3] - dst8[3]) * a / 256; } #endif } static uint32 alphabgra8888_coeff[] = { 0x00FF00FF, 0x00FF00FF }; static inline void pixel_alphabgra8888_mmx(void* dst, const void* src) { uint8* src8 = (uint8*)src; if (src8[3] == 0) { /* nothing */ } else if (src8[3] == 255) { uint32* src32 = (uint32*)src; uint32* dst32 = (uint32*)dst; *dst32 = *src32; } else { __asm__ __volatile__ ( "movq (%2), %%mm3\n" /* mm3 = 0F0F0F0F */ "movd (%0), %%mm1\n" /* mm1 = 0000ARGB (src) */ "punpcklbw %%mm1, %%mm1\n" /* mm1 = AARRGGBB */ "pand %%mm3, %%mm1\n" /* mm1 = 0A0R0G0B */ "movd (%1), %%mm2\n" /* mm2 = 0000ARGB (dst) */ "punpcklbw %%mm2, %%mm2\n" /* mm2 = AARRGGBB */ "pand %%mm3, %%mm2\n" /* mm2 = 0A0R0G0B */ "movq %%mm1, %%mm4\n" /* mm4 = 0A0R0G0B */ "psrlq $48, %%mm4\n" /* mm4 = 0000000A */ "punpcklwd %%mm4, %%mm4\n" /* mm4 = 00000A0A */ "punpcklwd %%mm4, %%mm4\n" /* mm4 = 0A0A0A0A */ "psubw %%mm2, %%mm1\n" /* mm1 = src - dst */ "pmullw %%mm4, %%mm1\n" /* mm1 = (alpha) * (src - dst) */ "psrlw $8, %%mm1\n" /* mm1 = (alpha) * (src - dst) / 256 */ "paddw %%mm1, %%mm2\n" /* mm2 = dst + (alpha) * (src - dst) / 256 */ "pand %%mm3, %%mm2\n" /* mm2 = 0A0R0G0B */ "packuswb %%mm2, %%mm2\n" /* mm2 = ARGBARGB */ "movd %%mm2, (%1)\n" : : "r" (src), "r" (dst), "r" (alphabgra8888_coeff) : "cc", "memory" ); } } #endif