/* Copyright (C) 1996, 1997, 1998, 1999 artofcode LLC. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA, 02111-1307. */ /*$Id: gsflip.c,v 1.2.6.1.2.1 2003/01/17 00:49:02 giles Exp $ */ /* Routines for "flipping" image data */ #include "gx.h" #include "gserrors.h" /* for rangecheck in sample macros */ #include "gsbitops.h" #include "gsbittab.h" #include "gsflip.h" #define ARCH_HAS_BYTE_REGS 1 /* Transpose a block of bits between registers. */ #define TRANSPOSE(r,s,mask,shift)\ r ^= (temp = ((s >> shift) ^ r) & mask);\ s ^= temp << shift /* Define the size of byte temporaries. On Intel CPUs, this should be */ /* byte, but on all other CPUs, it should be uint. */ #if ARCH_HAS_BYTE_REGS typedef byte byte_var; #else typedef uint byte_var; #endif #define VTAB(v80,v40,v20,v10,v8,v4,v2,v1)\ bit_table_8(0,v80,v40,v20,v10,v8,v4,v2,v1) /* Convert 3Mx1 to 3x1. */ private int flip3x1(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; int n = nbytes; static const bits32 tab3x1[256] = { VTAB(0x800000, 0x100000, 0x20000, 0x4000, 0x800, 0x100, 0x20, 4) }; for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) { bits32 b24 = tab3x1[*in1] | (tab3x1[*in2] >> 1) | (tab3x1[*in3] >> 2); out[0] = (byte) (b24 >> 16); out[1] = (byte) (b24 >> 8); out[2] = (byte) b24; } return 0; } /* Convert 3Mx2 to 3x2. */ private int flip3x2(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; int n = nbytes; static const bits32 tab3x2[256] = { VTAB(0x800000, 0x400000, 0x20000, 0x10000, 0x800, 0x400, 0x20, 0x10) }; for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) { bits32 b24 = tab3x2[*in1] | (tab3x2[*in2] >> 2) | (tab3x2[*in3] >> 4); out[0] = (byte) (b24 >> 16); out[1] = (byte) (b24 >> 8); out[2] = (byte) b24; } return 0; } /* Convert 3Mx4 to 3x4. */ private int flip3x4(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; int n = nbytes; for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) { byte_var b1 = *in1, b2 = *in2, b3 = *in3; out[0] = (b1 & 0xf0) | (b2 >> 4); out[1] = (b3 & 0xf0) | (b1 & 0xf); out[2] = (byte) (b2 << 4) | (b3 & 0xf); } return 0; } /* Convert 3Mx8 to 3x8. */ private int flip3x8(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; int n = nbytes; for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) { out[0] = *in1; out[1] = *in2; out[2] = *in3; } return 0; } /* Convert 3Mx12 to 3x12. */ private int flip3x12(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *pa = planes[0] + offset; const byte *pb = planes[1] + offset; const byte *pc = planes[2] + offset; int n = nbytes; /* * We assume that the input is an integral number of pixels, and * round up n to a multiple of 3. */ for (; n > 0; out += 9, pa += 3, pb += 3, pc += 3, n -= 3) { byte_var a1 = pa[1], b0 = pb[0], b1 = pb[1], b2 = pb[2], c1 = pc[1]; out[0] = pa[0]; out[1] = (a1 & 0xf0) | (b0 >> 4); out[2] = (byte) ((b0 << 4) | (b1 >> 4)); out[3] = pc[0]; out[4] = (c1 & 0xf0) | (a1 & 0xf); out[5] = pa[2]; out[6] = (byte) ((b1 << 4) | (b2 >> 4)); out[7] = (byte) ((b2 << 4) | (c1 & 0xf)); out[8] = pc[2]; } return 0; } /* Convert 4Mx1 to 4x1. */ private int flip4x1(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; const byte *in4 = planes[3] + offset; int n = nbytes; for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) { byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4; byte_var temp; /* Transpose blocks of 1 */ TRANSPOSE(b1, b2, 0x55, 1); TRANSPOSE(b3, b4, 0x55, 1); /* Transpose blocks of 2 */ TRANSPOSE(b1, b3, 0x33, 2); TRANSPOSE(b2, b4, 0x33, 2); /* There's probably a faster way to do this.... */ out[0] = (b1 & 0xf0) | (b2 >> 4); out[1] = (b3 & 0xf0) | (b4 >> 4); out[2] = (byte) ((b1 << 4) | (b2 & 0xf)); out[3] = (byte) ((b3 << 4) | (b4 & 0xf)); } return 0; } /* Convert 4Mx2 to 4x2. */ private int flip4x2(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; const byte *in4 = planes[3] + offset; int n = nbytes; for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) { byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4; byte_var temp; /* Transpose blocks of 4x2 */ TRANSPOSE(b1, b3, 0x0f, 4); TRANSPOSE(b2, b4, 0x0f, 4); /* Transpose blocks of 2x1 */ TRANSPOSE(b1, b2, 0x33, 2); TRANSPOSE(b3, b4, 0x33, 2); out[0] = b1; out[1] = b2; out[2] = b3; out[3] = b4; } return 0; } /* Convert 4Mx4 to 4x4. */ private int flip4x4(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; const byte *in4 = planes[3] + offset; int n = nbytes; for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) { byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4; out[0] = (b1 & 0xf0) | (b2 >> 4); out[1] = (b3 & 0xf0) | (b4 >> 4); out[2] = (byte) ((b1 << 4) | (b2 & 0xf)); out[3] = (byte) ((b3 << 4) | (b4 & 0xf)); } return 0; } /* Convert 4Mx8 to 4x8. */ private int flip4x8(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *in1 = planes[0] + offset; const byte *in2 = planes[1] + offset; const byte *in3 = planes[2] + offset; const byte *in4 = planes[3] + offset; int n = nbytes; for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) { out[0] = *in1; out[1] = *in2; out[2] = *in3; out[3] = *in4; } return 0; } /* Convert 4Mx12 to 4x12. */ private int flip4x12(byte * buffer, const byte ** planes, int offset, int nbytes) { byte *out = buffer; const byte *pa = planes[0] + offset; const byte *pb = planes[1] + offset; const byte *pc = planes[2] + offset; const byte *pd = planes[3] + offset; int n = nbytes; /* * We assume that the input is an integral number of pixels, and * round up n to a multiple of 3. */ for (; n > 0; out += 12, pa += 3, pb += 3, pc += 3, pd += 3, n -= 3) { byte_var a1 = pa[1], b1 = pb[1], c1 = pc[1], d1 = pd[1]; { byte_var v0; out[0] = pa[0]; v0 = pb[0]; out[1] = (a1 & 0xf0) | (v0 >> 4); out[2] = (byte) ((v0 << 4) | (b1 >> 4)); out[3] = pc[0]; v0 = pd[0]; out[4] = (c1 & 0xf0) | (v0 >> 4); out[5] = (byte) ((v0 << 4) | (d1 >> 4)); } { byte_var v2; v2 = pa[2]; out[6] = (byte) ((a1 << 4) | (v2 >> 4)); out[7] = (byte) ((v2 << 4) | (b1 & 0xf)); out[8] = pb[2]; v2 = pc[2]; out[9] = (byte) ((c1 << 4) | (v2 >> 4)); out[10] = (byte) ((v2 << 4) | (d1 & 0xf)); out[11] = pd[2]; } } return 0; } /* Convert NMx{1,2,4,8} to Nx{1,2,4,8}. */ private int flipNx1to8(byte * buffer, const byte ** planes, int offset, int nbytes, int num_planes, int bits_per_sample) { /* This is only needed for DeviceN colors, so it can be slow. */ uint mask = (1 << bits_per_sample) - 1; int bi, pi; sample_store_declare_setup(dptr, dbit, dbbyte, buffer, 0, bits_per_sample); for (bi = 0; bi < nbytes * 8; bi += bits_per_sample) { for (pi = 0; pi < num_planes; ++pi) { const byte *sptr = planes[pi] + offset + (bi >> 3); uint value = (*sptr >> (8 - (bi & 7) - bits_per_sample)) & mask; sample_store_next8(value, dptr, dbit, bits_per_sample, dbbyte); } } sample_store_flush(dptr, dbit, bits_per_sample, dbbyte); return 0; } /* Convert NMx12 to Nx12. */ private int flipNx12(byte * buffer, const byte ** planes, int offset, int nbytes, int num_planes, int ignore_bits_per_sample) { /* This is only needed for DeviceN colors, so it can be slow. */ int bi, pi; sample_store_declare_setup(dptr, dbit, dbbyte, buffer, 0, 12); for (bi = 0; bi < nbytes * 8; bi += 12) { for (pi = 0; pi < num_planes; ++pi) { const byte *sptr = planes[pi] + offset + (bi >> 3); uint value = (bi & 4 ? ((*sptr & 0xf) << 8) | sptr[1] : (*sptr << 4) | (sptr[1] >> 4)); sample_store_next_12(value, dptr, dbit, dbbyte); } } sample_store_flush(dptr, dbit, 12, dbbyte); return 0; } /* Flip data given number of planes and bits per pixel. */ typedef int (*image_flip_proc) (P4(byte *, const byte **, int, int)); private int flip_fail(byte * buffer, const byte ** planes, int offset, int nbytes) { return -1; } private const image_flip_proc image_flip3_procs[13] = { flip_fail, flip3x1, flip3x2, flip_fail, flip3x4, flip_fail, flip_fail, flip_fail, flip3x8, flip_fail, flip_fail, flip_fail, flip3x12 }; private const image_flip_proc image_flip4_procs[13] = { flip_fail, flip4x1, flip4x2, flip_fail, flip4x4, flip_fail, flip_fail, flip_fail, flip4x8, flip_fail, flip_fail, flip_fail, flip4x12 }; typedef int (*image_flipN_proc) (P6(byte *, const byte **, int, int, int, int)); private int flipN_fail(byte * buffer, const byte ** planes, int offset, int nbytes, int num_planes, int bits_per_sample) { return -1; } private const image_flipN_proc image_flipN_procs[13] = { flipN_fail, flipNx1to8, flipNx1to8, flipN_fail, flipNx1to8, flipN_fail, flipN_fail, flipN_fail, flipNx1to8, flipN_fail, flipN_fail, flipN_fail, flipNx12 }; /* Here is the public interface to all of the above. */ int image_flip_planes(byte * buffer, const byte ** planes, int offset, int nbytes, int num_planes, int bits_per_sample) { if (bits_per_sample < 1 || bits_per_sample > 12) return -1; switch (num_planes) { case 3: return image_flip3_procs[bits_per_sample] (buffer, planes, offset, nbytes); case 4: return image_flip4_procs[bits_per_sample] (buffer, planes, offset, nbytes); default: if (num_planes < 0) return -1; return image_flipN_procs[bits_per_sample] (buffer, planes, offset, nbytes, num_planes, bits_per_sample); } }