/******************************************************************************
*
* NSSDC/CDF Huffman compression/decompression.
*
* Version 1.0b, 2-Sep-97, Hughes STX.
*
* Modification history:
*
* V1.0 22-Jul-96, J Love Original version...actually a heavily modified
* version of algorithms originally written by ?.
* V1.0a 28-Feb-97, J Love Windows NT for MS Visual C/C++ on an IBM PC.
* V1.0b 2-Sep-97, J Love Type casting for ANSI C.
*
******************************************************************************/
#include "cdflib.h"
#if (!SUPPORT_HUFF || !SUPPORT_AHUFF) && defined(BORLANDC)
#pragma warn -par
#endif
/******************************************************************************
* Macros/prototypes.
******************************************************************************/
typedef uByte BYTE;
typedef uInt16 WORD;
typedef uInt32 DWORD;
typedef uInt INT; /* Needed to pass a WORD by value. */
/*
* The NODE structure is a node in the Huffman decoding tree. It has a
* count, which is its weight in the tree, and the node numbers of its
* two children. The saved_count member of the structure is only
* there for debugging purposes, and can be safely taken out at any
* time. It just holds the intial count for each of the symbols, since
* the count member is continually being modified as the tree grows.
*/
typedef struct tree_node {
WORD count;
WORD saved_count;
int child_0;
int child_1;
} NODE;
/*
* A Huffman tree is set up for decoding, not encoding. When encoding,
* I first walk through the tree and build up a table of codes for
* each symbol. The codes are stored in this CODE structure.
*/
typedef struct code {
WORD code;
int code_bits;
} CODE;
/*
* The special EOS symbol is 256, the first available symbol after all
* of the possible bytes. When decoding, reading this symbols
* indicates that all of the data has been read in.
*/
#define END_OF_STREAM 256
typedef struct bit_file {
vFILE *file;
BYTE mask;
int rack;
} BIT_FILE;
#define END_OF_STREAM 256
#define ESCAPE 257
#define SYMBOL_COUNT 258
#define NODE_TABLE_COUNT ((SYMBOL_COUNT*2)-1)
#define ROOT_NODE 0
#define MAX_WEIGHT 0x8000
/*
* This data structure is all that is needed to maintain an adaptive
* Huffman tree for both encoding and decoding. The leaf array is a
* set of indices into the nodes that indicate which node is the
* parent of a symbol. For example, to encode 'A', we would find the
* leaf node by way of leaf[ 'A' ]. The next_free_node index is used
* to tell which node is the next one in the array that can be used.
* Since nodes are allocated when characters are read in for the first
* time, this pointer keeps track of where we are in the node array.
* Finally, the array of nodes is the actual Huffman tree. The child
* index is either an index pointing to a pair of children, or an
* actual symbol value, depending on whether 'child_is_leaf' is true
* or false.
*/
typedef struct tree {
int leaf[ SYMBOL_COUNT ];
int next_free_node;
struct node {
WORD weight;
int parent;
int child_is_leaf;
int child;
} nodes[ NODE_TABLE_COUNT ];
} TREE;
#if SUPPORT_HUFF || SUPPORT_AHUFF
static BIT_FILE *StartBitFile PROTOARGs((vFILE *fp));
static Logical EndOutputBitFile PROTOARGs((BIT_FILE *bit_file));
static Logical EndInputBitFile PROTOARGs((BIT_FILE *bit_file));
static Logical OutputBits PROTOARGs((
BIT_FILE *bit_file, DWORD code, int count
));
static int InputBit PROTOARGs((BIT_FILE *bit_file));
#endif
#if SUPPORT_HUFF
static Logical output_counts PROTOARGs((BIT_FILE *output, NODE *nodes));
static Logical count_bytes PROTOARGs((
vFILE *input, DWORD *counts, Int32 iSize
));
static void scale_counts PROTOARGs((DWORD *counts, NODE *nodes));
static int build_tree PROTOARGs((NODE *nodes));
static void convert_tree_to_code PROTOARGs((
NODE *nodes, CODE *codes, INT code_so_far, int bits, int node
));
static CDFstatus compress_data PROTOARGs((
vFILE *input, BIT_FILE *output, CODE *codes, Int32 iSize, CDFstatus iError,
CDFstatus oError
));
static Logical input_counts PROTOARGs((BIT_FILE *input, NODE *nodes));
static CDFstatus expand_data PROTOARGs((
BIT_FILE *input, vFILE *output, NODE *nodes, int root_node, CDFstatus iError,
CDFstatus oError
));
#endif
#if SUPPORT_AHUFF
static void InitializeTree PROTOARGs((TREE *tree));
static Logical EncodeSymbol PROTOARGs((TREE *tree, INT c, BIT_FILE *output));
static int DecodeSymbol PROTOARGs((TREE *tree, BIT_FILE *input));
static void UpdateModel PROTOARGs((TREE *tree, int c));
static void RebuildTree PROTOARGs((TREE *tree));
static void swap_nodes PROTOARGs((TREE *tree, int i, int j));
static void add_new_node PROTOARGs((TREE *tree, int c));
static DWORD InputBits PROTOARGs((BIT_FILE *bit_file, int bit_count));
#endif
/******************************************************************************
* CompressHUFF0.
******************************************************************************/
STATICforIDL CDFstatus CompressHUFF0 (input, iOffset, iSize, iError,
oFp, oOffset, oSize, oError)
vFILE *input;
Int32 iOffset;
Int32 iSize;
CDFstatus iError;
vFILE *oFp;
Int32 oOffset;
Int32 *oSize;
CDFstatus oError;
{
#if SUPPORT_HUFF
BIT_FILE *output; DWORD *counts; NODE *nodes; CODE *codes;
CDFstatus pStatus = CDF_OK; int root_node; long newOffset;
if (!SEEKv(input,(long)iOffset,vSEEK_SET)) return iError;
if (!SEEKv(oFp,(long)oOffset,vSEEK_SET)) return oError;
output = StartBitFile (oFp);
if (output == NULL) return BAD_MALLOC;
*oSize = 0;
counts = (DWORD *) CallocateMemory (256, sizeof(DWORD), NULL);
if (counts == NULL) {
cdf_FreeMemory (output, NULL);
return BAD_MALLOC;
}
nodes = (NODE *) CallocateMemory (514, sizeof(NODE), NULL);
if (nodes == NULL) {
cdf_FreeMemory (counts, NULL);
cdf_FreeMemory (output, NULL);
return BAD_MALLOC;
}
codes = (CODE *) CallocateMemory (257, sizeof(CODE), NULL);
if (codes == NULL) {
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (counts, NULL);
cdf_FreeMemory (output, NULL);
return BAD_MALLOC;
}
if (!count_bytes(input,counts,iSize)) {
cdf_FreeMemory (codes, NULL);
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (counts, NULL);
cdf_FreeMemory (output, NULL);
return iError;
}
scale_counts (counts, nodes);
if (!output_counts(output,nodes)) {
cdf_FreeMemory (codes, NULL);
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (counts, NULL);
cdf_FreeMemory (output, NULL);
return oError;
}
root_node = build_tree (nodes);
convert_tree_to_code (nodes, codes, (INT) 0, 0, root_node);
if (!sX(compress_data(input,output,codes,iSize,iError,oError),&pStatus)) {
cdf_FreeMemory (codes, NULL);
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (counts, NULL);
cdf_FreeMemory (output, NULL);
return pStatus;
}
if (!EndOutputBitFile(output)) {
cdf_FreeMemory (codes, NULL);
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (counts, NULL);
return oError;
}
newOffset = V_tell (oFp);
if (newOffset == EOF) {
cdf_FreeMemory (codes, NULL);
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (counts, NULL);
return oError;
}
*oSize = newOffset - oOffset;
cdf_FreeMemory (codes, NULL);
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (counts, NULL);
return pStatus;
#else
return UNKNOWN_COMPRESSION;
#endif
}
/******************************************************************************
* DecompressHUFF0.
******************************************************************************/
STATICforIDL CDFstatus DecompressHUFF0 (iFp, iOffset, iError,
output, oOffset, oError)
vFILE *iFp;
Int32 iOffset;
CDFstatus iError;
vFILE *output;
Int32 oOffset;
CDFstatus oError;
{
#if SUPPORT_HUFF
CDFstatus pStatus = CDF_OK; BIT_FILE *input; NODE *nodes; int root_node;
if (!SEEKv(iFp,(long)iOffset,vSEEK_SET)) return iError;
if (!SEEKv(output,(long)oOffset,vSEEK_SET)) return oError;
input = StartBitFile (iFp);
if (input == NULL) return BAD_MALLOC;
nodes = (NODE *) CallocateMemory (514, sizeof(NODE), NULL);
if (nodes == NULL) {
cdf_FreeMemory (input, NULL);
return BAD_MALLOC;
}
if (!input_counts(input,nodes)) {
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (input, NULL);
return iError;
}
root_node = build_tree( nodes );
if (!sX(expand_data(input,output,nodes,root_node,iError,oError),&pStatus)) {
cdf_FreeMemory (nodes, NULL);
cdf_FreeMemory (input, NULL);
return pStatus;
}
if (!EndInputBitFile(input)) {
cdf_FreeMemory (nodes, NULL);
return iError;
}
cdf_FreeMemory (nodes, NULL);
return pStatus;
#else
return UNKNOWN_COMPRESSION;
#endif
}
/******************************************************************************
* CompressAHUFF0.
******************************************************************************/
/*
* The high level view of the compression routine is very simple.
* First, we initialize the Huffman tree, with just the ESCAPE and
* END_OF_STREAM symbols. Then, we sit in a loop, encoding symbols,
* and adding them to the model. When there are no more characters
* to send, the special END_OF_STREAM symbol is encoded. The decoder
* will later be able to use this symbol to know when to quit.
*/
STATICforIDL CDFstatus CompressAHUFF0 (input, iOffset, iSize, iError,
oFp, oOffset, oSize, oError)
vFILE *input;
Int32 iOffset;
Int32 iSize;
CDFstatus iError;
vFILE *oFp;
Int32 oOffset;
Int32 *oSize;
CDFstatus oError;
{
#if SUPPORT_AHUFF
CDFstatus pStatus = CDF_OK; BIT_FILE *output; TREE *Tree;
int c; Int32 i; long newOffset;
if (!SEEKv(input,(long)iOffset,vSEEK_SET)) return iError;
if (!SEEKv(oFp,(long)oOffset,vSEEK_SET)) return oError;
output = StartBitFile (oFp);
if (output == NULL) return BAD_MALLOC;
*oSize = 0;
Tree = (TREE *) CallocateMemory (1, sizeof(TREE), NULL);
if (Tree == NULL) {
cdf_FreeMemory (output, NULL);
return BAD_MALLOC;
}
InitializeTree (Tree);
for (i = 0; i < iSize; i++) {
if ((c = V_getc(input)) == EOF) {
cdf_FreeMemory (Tree, NULL);
cdf_FreeMemory (output, NULL);
return iError;
}
if (!EncodeSymbol(Tree,(INT)c,output)) {
cdf_FreeMemory (Tree, NULL);
cdf_FreeMemory (output, NULL);
return oError;
}
UpdateModel (Tree, c);
}
if (!EncodeSymbol(Tree,(INT)END_OF_STREAM,output)) {
cdf_FreeMemory (Tree, NULL);
cdf_FreeMemory (output, NULL);
return oError;
}
if (!EndOutputBitFile(output)) {
cdf_FreeMemory (Tree, NULL);
return oError;
}
newOffset = V_tell (oFp);
if (newOffset == EOF) {
cdf_FreeMemory (Tree, NULL);
return oError;
}
*oSize = newOffset - oOffset;
cdf_FreeMemory (Tree, NULL);
return pStatus;
#else
return UNKNOWN_COMPRESSION;
#endif
}
/******************************************************************************
* DecompressAHUFF0.
******************************************************************************/
STATICforIDL CDFstatus DecompressAHUFF0 (iFp, iOffset, iError,
output, oOffset, oError)
vFILE *iFp;
Int32 iOffset;
CDFstatus iError;
vFILE *output;
Int32 oOffset;
CDFstatus oError;
{
#if SUPPORT_AHUFF
CDFstatus pStatus = CDF_OK; BIT_FILE *input; TREE *Tree; int c;
if (!SEEKv(iFp,(long)iOffset,vSEEK_SET)) return iError;
if (!SEEKv(output,(long)oOffset,vSEEK_SET)) return oError;
input = StartBitFile (iFp);
if (input == NULL) return BAD_MALLOC;
Tree = (TREE *) CallocateMemory (1, sizeof(TREE), NULL);
if (Tree == NULL) {
cdf_FreeMemory (input, NULL);
return BAD_MALLOC;
}
InitializeTree (Tree);
while ((c = DecodeSymbol(Tree,input)) != END_OF_STREAM) {
if (c == EOF) {
cdf_FreeMemory (Tree, NULL);
cdf_FreeMemory (input, NULL);
return iError;
}
if (V_putc(c,output) == EOF) {
cdf_FreeMemory (Tree, NULL);
cdf_FreeMemory (input, NULL);
return oError;
}
UpdateModel (Tree, c);
}
if (!EndInputBitFile(input)) {
cdf_FreeMemory (Tree, NULL);
return iError;
}
cdf_FreeMemory (Tree, NULL);
return pStatus;
#else
return UNKNOWN_COMPRESSION;
#endif
}
#if SUPPORT_HUFF
/******************************************************************************
* output_counts.
******************************************************************************/
/*
* In order for the compressor to build the same model, I have to store
* the symbol counts in the compressed file so the expander can read
* them in. In order to save space, I don't save all 256 symbols
* unconditionally. The format used to store counts looks like this:
*
* start, stop, counts, start, stop, counts, ... 0
*
* This means that I store runs of counts, until all the non-zero
* counts have been stored. At this time the list is terminated by
* storing a start value of 0. Note that at least 1 run of counts has
* to be stored, so even if the first start value is 0, I read it in.
* It also means that even in an empty file that has no counts, I have
* to pass at least one count.
*
* In order to efficiently use this format, I have to identify runs of
* non-zero counts. Because of the format used, I don't want to stop a
* run because of just one or two zeros in the count stream. So I have
* to sit in a loop looking for strings of three or more zero values in
* a row.
*
* This is simple in concept, but it ends up being one of the most
* complicated routines in the whole program. A routine that just
* writes out 256 values without attempting to optimize would be much
* simpler, but would hurt compression quite a bit on small files.
*
*/
static Logical output_counts (output, nodes)
BIT_FILE *output;
NODE *nodes;
{
int first;
int last;
int next;
int i;
first = 0;
while ( first < 255 && nodes[ first ].count == 0 )
first++;
/*
* Each time I hit the start of the loop, I assume that first is the
* number for a run of non-zero values. The rest of the loop is
* concerned with finding the value for last, which is the end of the
* run, and the value of next, which is the start of the next run.
* At the end of the loop, I assign next to first, so it starts in on
* the next run.
*/
for ( ; first < 256 ; first = next ) {
last = first + 1;
for ( ; ; ) {
for ( ; last < 256 ; last++ )
if ( nodes[ last ].count == 0 )
break;
last--;
for ( next = last + 1; next < 256 ; next++ )
if ( nodes[ next ].count != 0 )
break;
if ( next > 255 )
break;
if ( ( next - last ) > 3 )
break;
last = next;
};
/*
* Here is where I output first, last, and all the counts in between.
*/
if (V_putc(first,output->file) != first) return FALSE;
if (V_putc(last,output->file) != last) return FALSE;
for ( i = first ; i <= last ; i++ ) {
if (V_putc((int) nodes[ i ].count, output->file ) !=
(int) nodes[ i ].count ) return FALSE;
}
}
if (V_putc(0,output->file) != 0) return FALSE;
return TRUE;
}
#endif
#if SUPPORT_HUFF
/******************************************************************************
* count_bytes.
******************************************************************************/
/*
* This routine counts the frequency of occurence of every byte in
* the input file. It marks the place in the input stream where it
* started, counts up all the bytes, then returns to the place where
* it started. In most C implementations, the length of a file
* cannot exceed an unsigned long, so this routine should always
* work.
*/
static Logical count_bytes( input, counts, iSize )
vFILE *input;
DWORD *counts;
Int32 iSize;
{
long input_marker;
int c;
Int32 i;
input_marker = V_tell (input);
if (input_marker == EOF) return FALSE;
for (i = 0; i < iSize; i++) {
if ((c = V_getc(input)) == EOF) return FALSE;
counts[c]++;
}
if (!SEEKv(input,input_marker,vSEEK_SET)) return FALSE;
return TRUE;
}
#endif
#if SUPPORT_HUFF
/******************************************************************************
* scale_counts.
******************************************************************************/
/*
* In order to limit the size of my Huffman codes to 16 bits, I scale
* my counts down so they fit in an unsigned char, and then store them
* all as initial weights in my NODE array. The only thing to be
* careful of is to make sure that a node with a non-zero count doesn't
* get scaled down to 0. Nodes with values of 0 don't get codes.
*/
static void scale_counts( counts, nodes )
DWORD *counts;
NODE *nodes;
{
DWORD max_count;
int i;
max_count = 0;
for ( i = 0 ; i < 256 ; i++ )
if ( counts[ i ] > max_count )
max_count = counts[ i ];
if ( max_count == 0 ) {
counts[ 0 ] = 1;
max_count = 1;
}
max_count = max_count / 255;
max_count = max_count + 1;
for ( i = 0 ; i < 256 ; i++ ) {
nodes[ i ].count = (WORD) ( counts[ i ] / max_count );
if ( nodes[ i ].count == 0 && counts[ i ] != 0 )
nodes[ i ].count = 1;
}
nodes[ END_OF_STREAM ].count = 1;
}
#endif
#if SUPPORT_HUFF
/******************************************************************************
* build_tree.
******************************************************************************/
/*
* Building the Huffman tree is fairly simple. All of the active nodes
* are scanned in order to locate the two nodes with the minimum
* weights. These two weights are added together and assigned to a new
* node. The new node makes the two minimum nodes into its 0 child
* and 1 child. The two minimum nodes are then marked as inactive.
* This process repeats until their is only one node left, which is the
* root node. The tree is done, and the root node is passed back
* to the calling routine.
*
* Node 513 is used here to arbitratily provide a node with a guaranteed
* maximum value. It starts off being min_1 and min_2. After all active
* nodes have been scanned, I can tell if there is only one active node
* left by checking to see if min_1 is still 513.
*/
static int build_tree( nodes )
NODE *nodes;
{
int next_free;
int i;
int min_1;
int min_2;
nodes[ 513 ].count = 0xffff;
for ( next_free = END_OF_STREAM + 1 ; ; next_free++ ) {
min_1 = 513;
min_2 = 513;
for ( i = 0 ; i < next_free ; i++ )
if ( nodes[ i ].count != 0 ) {
if ( nodes[ i ].count < nodes[ min_1 ].count ) {
min_2 = min_1;
min_1 = i;
} else if ( nodes[ i ].count < nodes[ min_2 ].count )
min_2 = i;
}
if ( min_2 == 513 )
break;
nodes[ next_free ].count = nodes[ min_1 ].count
+ nodes[ min_2 ].count;
nodes[ min_1 ].saved_count = nodes[ min_1 ].count;
nodes[ min_1 ].count = 0;
nodes[ min_2 ].saved_count = nodes[ min_2 ].count;
nodes[ min_2 ].count = 0;
nodes[ next_free ].child_0 = min_1;
nodes[ next_free ].child_1 = min_2;
}
next_free--;
nodes[ next_free ].saved_count = nodes[ next_free ].count;
return( next_free );
}
#endif
#if SUPPORT_HUFF
/******************************************************************************
* convert_tree_to_code.
******************************************************************************/
/*
* Since the Huffman tree is built as a decoding tree, there is
* no simple way to get the encoding values for each symbol out of
* it. This routine recursively walks through the tree, adding the
* child bits to each code until it gets to a leaf. When it gets
* to a leaf, it stores the code value in the CODE element, and
* returns.
*/
static void convert_tree_to_code( nodes, codes, code_so_far, bits, node )
NODE *nodes;
CODE *codes;
INT code_so_far;
int bits;
int node;
{
if (node <= END_OF_STREAM) {
codes[node].code = (WORD) code_so_far;
codes[node].code_bits = bits;
return;
}
code_so_far <<= 1;
bits++;
convert_tree_to_code (nodes, codes, code_so_far, bits, nodes[node].child_0 );
convert_tree_to_code (nodes, codes, (INT) (code_so_far | 1),
bits, nodes[node].child_1 );
return;
}
#endif
#if SUPPORT_HUFF
/******************************************************************************
* compress_data.
******************************************************************************/
/*
* Once the tree gets built, and the CODE table is built, compressing
* the data is a breeze. Each byte is read in, and its corresponding
* Huffman code is sent out.
*/
static CDFstatus compress_data( input, output, codes, iSize, iError, oError)
vFILE *input;
BIT_FILE *output;
CODE *codes;
Int32 iSize;
CDFstatus iError;
CDFstatus oError;
{
int c;
long i;
for (i = 0; i < iSize; i++) {
if ((c = V_getc(input)) == EOF) return iError;
if (!OutputBits(output,(DWORD)codes[c].code,codes[ c ].code_bits)) {
return oError;
}
}
if (!OutputBits(output,(DWORD)codes[END_OF_STREAM].code,
codes[END_OF_STREAM].code_bits)) return oError;
return CDF_OK;
}
#endif
#if SUPPORT_HUFF || SUPPORT_AHUFF
/******************************************************************************
* StartBitFile.
******************************************************************************/
static BIT_FILE *StartBitFile (fp)
vFILE *fp;
{
BIT_FILE *bit_file;
bit_file = (BIT_FILE *) CallocateMemory (1, sizeof(BIT_FILE), NULL);
if (bit_file == NULL) return NULL;
bit_file->file = fp;
bit_file->rack = 0;
bit_file->mask = 0x80;
return bit_file;
}
#endif
#if SUPPORT_HUFF || SUPPORT_AHUFF
/******************************************************************************
* EndOutputBitFile.
******************************************************************************/
static Logical EndOutputBitFile (bit_file)
BIT_FILE *bit_file;
{
Logical status = TRUE;
if (bit_file->mask != 0x80) {
if (V_putc(bit_file->rack,
bit_file->file) != bit_file->rack) status = FALSE;
}
cdf_FreeMemory (bit_file, NULL);
return status;
}
#endif
#if SUPPORT_HUFF || SUPPORT_AHUFF
/******************************************************************************
* EndInputBitFile.
******************************************************************************/
static Logical EndInputBitFile (bit_file)
BIT_FILE *bit_file;
{
cdf_FreeMemory (bit_file, NULL);
return TRUE;
}
#endif
#if SUPPORT_HUFF || SUPPORT_AHUFF
/******************************************************************************
* OutputBits.
******************************************************************************/
static Logical OutputBits( bit_file, code, count )
BIT_FILE *bit_file;
DWORD code;
int count;
{
DWORD mask;
mask = 1L << ( count - 1 );
while ( mask != 0) {
if ( mask & code ) bit_file->rack |= bit_file->mask;
bit_file->mask >>= 1;
if ( bit_file->mask == 0 ) {
if (V_putc( bit_file->rack, bit_file->file ) != bit_file->rack ) {
return FALSE;
}
bit_file->rack = 0;
bit_file->mask = 0x80;
}
mask >>= 1;
}
return TRUE;
}
#endif
#if SUPPORT_HUFF || SUPPORT_AHUFF
/******************************************************************************
* InputBit.
******************************************************************************/
static int InputBit (bit_file)
BIT_FILE *bit_file;
{
int value;
if ( bit_file->mask == 0x80 ) {
bit_file->rack = V_getc( bit_file->file );
if ( bit_file->rack == EOF ) return EOF;
}
value = bit_file->rack & bit_file->mask;
bit_file->mask >>= 1;
if ( bit_file->mask == 0 ) bit_file->mask = 0x80;
return ( value ? 1 : 0 );
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* InputBits.
******************************************************************************/
static DWORD InputBits( bit_file, bit_count )
BIT_FILE *bit_file;
int bit_count;
{
DWORD mask;
DWORD return_value;
mask = 1L << ( bit_count - 1 );
return_value = 0;
while ( mask != 0) {
if ( bit_file->mask == 0x80 ) {
bit_file->rack = V_getc( bit_file->file );
if ( bit_file->rack == EOF ) return ((DWORD) EOF);
}
if ( bit_file->rack & bit_file->mask ) return_value |= mask;
mask >>= 1;
bit_file->mask >>= 1;
if ( bit_file->mask == 0 ) bit_file->mask = 0x80;
}
return( return_value );
}
#endif
#if SUPPORT_HUFF
/******************************************************************************
* input_counts.
******************************************************************************/
/*
* When expanding, I have to read in the same set of counts. This is
* quite a bit easier that the process of writing them out, since no
* decision making needs to be done. All I do is read in first, check
* to see if I am all done, and if not, read in last and a string of
* counts.
*/
static Logical input_counts( input, nodes )
BIT_FILE *input;
NODE *nodes;
{
int first;
int last;
int i;
int c;
for ( i = 0 ; i < 256 ; i++ ) nodes[ i ].count = 0;
if ( ( first = V_getc( input->file ) ) == EOF ) return FALSE;
if ( ( last = V_getc( input->file ) ) == EOF ) return FALSE;
for ( ; ; ) {
for ( i = first ; i <= last ; i++ ) {
if ( ( c = V_getc( input->file ) ) == EOF ) return FALSE;
nodes[ i ].count = (WORD) c;
}
if ( ( first = V_getc( input->file ) ) == EOF ) return FALSE;
if ( first == 0 ) break;
if ( ( last = V_getc( input->file ) ) == EOF ) return FALSE;
}
nodes[ END_OF_STREAM ].count = 1;
return TRUE;
}
#endif
#if SUPPORT_HUFF
/******************************************************************************
* expand_data.
******************************************************************************/
/*
* Expanding compressed data is a little harder than the compression
* phase. As each new symbol is decoded, the tree is traversed,
* starting at the root node, reading a bit in, and taking either the
* child_0 or child_1 path. Eventually, the tree winds down to a
* leaf node, and the corresponding symbol is output. If the symbol
* is the END_OF_STREAM symbol, it doesn't get written out, and
* instead the whole process terminates.
*/
static CDFstatus expand_data( input, output, nodes, root_node, iError, oError)
BIT_FILE *input;
vFILE *output;
NODE *nodes;
int root_node;
CDFstatus iError;
CDFstatus oError;
{
int node;
for ( ; ; ) {
node = root_node;
do {
int bit = InputBit (input);
if (bit == EOF) return iError;
if (bit)
node = nodes[ node ].child_1;
else
node = nodes[ node ].child_0;
} while ( node > END_OF_STREAM );
if ( node == END_OF_STREAM ) break;
if ( ( V_putc( node, output ) ) != node ) return oError;
}
return CDF_OK;
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* InitializeTree.
******************************************************************************/
/*
* When performing adaptive compression, the Huffman tree starts out
* very nearly empty. The only two symbols present initially are the
* ESCAPE symbol and the END_OF_STREAM symbol. The ESCAPE symbol has to
* be included so we can tell the expansion prog that we are transmitting a
* previously unseen symbol. The END_OF_STREAM symbol is here because
* it is greater than eight bits, and our ESCAPE sequence only allows for
* eight bit symbols following the ESCAPE code.
*
* In addition to setting up the root node and its two children, this
* routine also initializes the leaf array. The ESCAPE and END_OF_STREAM
* leaf elements are the only ones initially defined, the rest of the leaf
* elements are set to -1 to show that they aren't present in the
* Huffman tree yet.
*/
static void InitializeTree( tree )
TREE *tree;
{
int i;
tree->nodes[ ROOT_NODE ].child = ROOT_NODE + 1;
tree->nodes[ ROOT_NODE ].child_is_leaf = FALSE;
tree->nodes[ ROOT_NODE ].weight = 2;
tree->nodes[ ROOT_NODE ].parent = -1;
tree->nodes[ ROOT_NODE + 1 ].child = END_OF_STREAM;
tree->nodes[ ROOT_NODE + 1 ].child_is_leaf = TRUE;
tree->nodes[ ROOT_NODE + 1 ].weight = 1;
tree->nodes[ ROOT_NODE + 1 ].parent = ROOT_NODE;
tree->leaf[ END_OF_STREAM ] = ROOT_NODE + 1;
tree->nodes[ ROOT_NODE + 2 ].child = ESCAPE;
tree->nodes[ ROOT_NODE + 2 ].child_is_leaf = TRUE;
tree->nodes[ ROOT_NODE + 2 ].weight = 1;
tree->nodes[ ROOT_NODE + 2 ].parent = ROOT_NODE;
tree->leaf[ ESCAPE ] = ROOT_NODE + 2;
tree->next_free_node = ROOT_NODE + 3;
for ( i = 0 ; i < END_OF_STREAM ; i++ ) tree->leaf[ i ] = -1;
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* EncodeSymbol.
******************************************************************************/
/*
* This routine is responsible for taking a symbol, and converting
* it into the sequence of bits dictated by the Huffman tree. The
* only complication is that we are working are way up from the leaf
* to the root, and hence are getting the bits in reverse order. This
* means we have to rack up the bits in an integer and then send them
* out after they are all accumulated. In this version of the program,
* we keep our codes in a long integer, so the maximum count is set
* to an arbitray limit of 0x8000. It could be set as high as 65535
* if desired.
*/
static Logical EncodeSymbol( tree, c, output )
TREE *tree;
INT c;
BIT_FILE *output;
{
DWORD code;
DWORD current_bit;
int code_size;
int current_node;
code = 0;
current_bit = 1;
code_size = 0;
current_node = tree->leaf[(int)c];
if ( current_node == -1 ) current_node = tree->leaf[ ESCAPE ];
while ( current_node != ROOT_NODE ) {
if ( ( current_node & 1 ) == 0 ) code |= current_bit;
current_bit <<= 1;
code_size++;
current_node = tree->nodes[ current_node ].parent;
};
if (!OutputBits(output,code,code_size)) return FALSE;
if ( tree->leaf[(int)c] == -1 ) {
if (!OutputBits(output,(DWORD)c,8)) return FALSE;
add_new_node( tree, (int) c );
}
return TRUE;
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* DecodeSymbol.
******************************************************************************/
/*
* Decoding symbols is easy. We start at the root node, then go down
* the tree until we reach a leaf. At each node, we decide which
* child to take based on the next input bit. After getting to the
* leaf, we check to see if we read in the ESCAPE code. If we did,
* it means that the next symbol is going to come through in the next
* eight bits, unencoded. If that is the case, we read it in here,
* and add the new symbol to the table.
*/
static int DecodeSymbol (tree, input)
TREE *tree;
BIT_FILE *input;
{
int current_node;
int c;
int bit;
current_node = ROOT_NODE;
while ( !tree->nodes[ current_node ].child_is_leaf ) {
current_node = tree->nodes[ current_node ].child;
bit = InputBit (input);
if (bit == EOF) return EOF;
current_node += bit;
}
c = tree->nodes[ current_node ].child;
if ( c == ESCAPE ) {
c = (int) InputBits( input, 8 );
if (c == EOF) return EOF;
add_new_node( tree, c );
}
return( c );
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* UpdateModel.
******************************************************************************/
/*
* UpdateModel is called to increment the count for a given symbol.
* After incrementing the symbol, this code has to work its way up
* through the parent nodes, incrementing each one of them. That is
* the easy part. The hard part is that after incrementing each
* parent node, we have to check to see if it is now out of the proper
* order. If it is, it has to be moved up the tree into its proper
* place.
*/
static void UpdateModel( tree, c )
TREE *tree;
int c;
{
int current_node;
int new_node;
if ( tree->nodes[ ROOT_NODE].weight == MAX_WEIGHT ) RebuildTree( tree );
current_node = tree->leaf[ c ];
while ( current_node != -1 ) {
tree->nodes[ current_node ].weight++;
for ( new_node = current_node ; new_node > ROOT_NODE ; new_node-- )
if ( tree->nodes[ new_node - 1 ].weight >=
tree->nodes[ current_node ].weight )
break;
if ( current_node != new_node ) {
swap_nodes( tree, current_node, new_node );
current_node = new_node;
}
current_node = tree->nodes[ current_node ].parent;
}
return;
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* RebuildTree.
******************************************************************************/
/*
* Rebuilding the tree takes place when the counts have gone too
* high. From a simple point of view, rebuilding the tree just means that
* we divide every count by two. Unfortunately, due to truncation effects,
* this means that the tree's shape might change. Some nodes might move
* up due to cumulative increases, while others may move down.
*/
static void RebuildTree( tree )
TREE *tree;
{
int i;
int j;
int k;
WORD weight;
/*
* To start rebuilding the table, I collect all the leaves of the Huffman
* tree and put them in the end of the tree. While I am doing that, I
* scale the counts down by a factor of 2.
*/
j = tree->next_free_node - 1;
for ( i = j ; i >= ROOT_NODE ; i-- ) {
if ( tree->nodes[ i ].child_is_leaf ) {
tree->nodes[ j ] = tree->nodes[ i ];
tree->nodes[ j ].weight = (tree->nodes[j].weight + 1) / ((WORD) 2);
j--;
}
}
/*
* At this point, j points to the first free node. I now have all the
* leaves defined, and need to start building the higher nodes on the
* tree. I will start adding the new internal nodes at j. Every time
* I add a new internal node to the top of the tree, I have to check to
* see where it really belongs in the tree. It might stay at the top,
* but there is a good chance I might have to move it back down. If it
* does have to go down, I use the memmove() function to scoot everyone
* bigger up by one node. Note that memmove() may have to be change
* to memcpy() on some UNIX systems. The parameters are unchanged, as
* memmove and memcpy have the same set of parameters.
*/
for ( i = tree->next_free_node - 2 ; j >= ROOT_NODE ; i -= 2, j-- ) {
k = i + 1;
tree->nodes[ j ].weight = tree->nodes[ i ].weight +
tree->nodes[ k ].weight;
weight = tree->nodes[ j ].weight;
tree->nodes[ j ].child_is_leaf = FALSE;
for ( k = j + 1 ; weight < tree->nodes[ k ].weight ; k++ )
;
k--;
memmove( &tree->nodes[ j ], &tree->nodes[ j + 1 ],
( k - j ) * sizeof( struct node ) );
tree->nodes[ k ].weight = weight;
tree->nodes[ k ].child = i;
tree->nodes[ k ].child_is_leaf = FALSE;
}
/*
* The final step in tree reconstruction is to go through and set up
* all of the leaf and parent members. This can be safely done now
* that every node is in its final position in the tree.
*/
for ( i = tree->next_free_node - 1 ; i >= ROOT_NODE ; i-- ) {
if ( tree->nodes[ i ].child_is_leaf ) {
k = tree->nodes[ i ].child;
tree->leaf[ k ] = i;
} else {
k = tree->nodes[ i ].child;
tree->nodes[ k ].parent = tree->nodes[ k + 1 ].parent = i;
}
}
return;
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* swap_nodes.
******************************************************************************/
/*
* Swapping nodes takes place when a node has grown too big for its
* spot in the tree. When swapping nodes i and j, we rearrange the
* tree by exchanging the children under i with the children under j.
*/
static void swap_nodes( tree, i, j )
TREE *tree;
int i;
int j;
{
struct node temp;
if ( tree->nodes[ i ].child_is_leaf )
tree->leaf[ tree->nodes[ i ].child ] = j;
else {
tree->nodes[ tree->nodes[ i ].child ].parent = j;
tree->nodes[ tree->nodes[ i ].child + 1 ].parent = j;
}
if ( tree->nodes[ j ].child_is_leaf )
tree->leaf[ tree->nodes[ j ].child ] = i;
else {
tree->nodes[ tree->nodes[ j ].child ].parent = i;
tree->nodes[ tree->nodes[ j ].child + 1 ].parent = i;
}
temp = tree->nodes[ i ];
tree->nodes[ i ] = tree->nodes[ j ];
tree->nodes[ i ].parent = temp.parent;
temp.parent = tree->nodes[ j ].parent;
tree->nodes[ j ] = temp;
return;
}
#endif
#if SUPPORT_AHUFF
/******************************************************************************
* add_new_node.
******************************************************************************/
/*
* Adding a new node to the tree is pretty simple. It is just a matter
* of splitting the lightest-weight node in the tree, which is the highest
* valued node. We split it off into two new nodes, one of which is the
* one being added to the tree. We assign the new node a weight of 0,
* so the tree doesn't have to be adjusted. It will be updated later when
* the normal update process occurs. Note that this code assumes that
* the lightest node has a leaf as a child. If this is not the case,
* the tree would be broken.
*/
static void add_new_node( tree, c )
TREE *tree;
int c;
{
int lightest_node;
int new_node;
int zero_weight_node;
lightest_node = tree->next_free_node - 1;
new_node = tree->next_free_node;
zero_weight_node = tree->next_free_node + 1;
tree->next_free_node += 2;
tree->nodes[ new_node ] = tree->nodes[ lightest_node ];
tree->nodes[ new_node ].parent = lightest_node;
tree->leaf[ tree->nodes[ new_node ].child ] = new_node;
tree->nodes[ lightest_node ].child = new_node;
tree->nodes[ lightest_node ].child_is_leaf = FALSE;
tree->nodes[ zero_weight_node ].child = c;
tree->nodes[ zero_weight_node ].child_is_leaf = TRUE;
tree->nodes[ zero_weight_node ].weight = 0;
tree->nodes[ zero_weight_node ].parent = lightest_node;
tree->leaf[ c ] = zero_weight_node;
return;
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1