/****************************************************************************** * * NSSDC/CDF Huffman compression/decompression. * * Version 1.0b, 2-Sep-97, Hughes STX. * * Modification history: * * V1.0 22-Jul-96, J Love Original version...actually a heavily modified * version of algorithms originally written by ?. * V1.0a 28-Feb-97, J Love Windows NT for MS Visual C/C++ on an IBM PC. * V1.0b 2-Sep-97, J Love Type casting for ANSI C. * ******************************************************************************/ #include "cdflib.h" #if (!SUPPORT_HUFF || !SUPPORT_AHUFF) && defined(BORLANDC) #pragma warn -par #endif /****************************************************************************** * Macros/prototypes. ******************************************************************************/ typedef uByte BYTE; typedef uInt16 WORD; typedef uInt32 DWORD; typedef uInt INT; /* Needed to pass a WORD by value. */ /* * The NODE structure is a node in the Huffman decoding tree. It has a * count, which is its weight in the tree, and the node numbers of its * two children. The saved_count member of the structure is only * there for debugging purposes, and can be safely taken out at any * time. It just holds the intial count for each of the symbols, since * the count member is continually being modified as the tree grows. */ typedef struct tree_node { WORD count; WORD saved_count; int child_0; int child_1; } NODE; /* * A Huffman tree is set up for decoding, not encoding. When encoding, * I first walk through the tree and build up a table of codes for * each symbol. The codes are stored in this CODE structure. */ typedef struct code { WORD code; int code_bits; } CODE; /* * The special EOS symbol is 256, the first available symbol after all * of the possible bytes. When decoding, reading this symbols * indicates that all of the data has been read in. */ #define END_OF_STREAM 256 typedef struct bit_file { vFILE *file; BYTE mask; int rack; } BIT_FILE; #define END_OF_STREAM 256 #define ESCAPE 257 #define SYMBOL_COUNT 258 #define NODE_TABLE_COUNT ((SYMBOL_COUNT*2)-1) #define ROOT_NODE 0 #define MAX_WEIGHT 0x8000 /* * This data structure is all that is needed to maintain an adaptive * Huffman tree for both encoding and decoding. The leaf array is a * set of indices into the nodes that indicate which node is the * parent of a symbol. For example, to encode 'A', we would find the * leaf node by way of leaf[ 'A' ]. The next_free_node index is used * to tell which node is the next one in the array that can be used. * Since nodes are allocated when characters are read in for the first * time, this pointer keeps track of where we are in the node array. * Finally, the array of nodes is the actual Huffman tree. The child * index is either an index pointing to a pair of children, or an * actual symbol value, depending on whether 'child_is_leaf' is true * or false. */ typedef struct tree { int leaf[ SYMBOL_COUNT ]; int next_free_node; struct node { WORD weight; int parent; int child_is_leaf; int child; } nodes[ NODE_TABLE_COUNT ]; } TREE; #if SUPPORT_HUFF || SUPPORT_AHUFF static BIT_FILE *StartBitFile PROTOARGs((vFILE *fp)); static Logical EndOutputBitFile PROTOARGs((BIT_FILE *bit_file)); static Logical EndInputBitFile PROTOARGs((BIT_FILE *bit_file)); static Logical OutputBits PROTOARGs(( BIT_FILE *bit_file, DWORD code, int count )); static int InputBit PROTOARGs((BIT_FILE *bit_file)); #endif #if SUPPORT_HUFF static Logical output_counts PROTOARGs((BIT_FILE *output, NODE *nodes)); static Logical count_bytes PROTOARGs(( vFILE *input, DWORD *counts, Int32 iSize )); static void scale_counts PROTOARGs((DWORD *counts, NODE *nodes)); static int build_tree PROTOARGs((NODE *nodes)); static void convert_tree_to_code PROTOARGs(( NODE *nodes, CODE *codes, INT code_so_far, int bits, int node )); static CDFstatus compress_data PROTOARGs(( vFILE *input, BIT_FILE *output, CODE *codes, Int32 iSize, CDFstatus iError, CDFstatus oError )); static Logical input_counts PROTOARGs((BIT_FILE *input, NODE *nodes)); static CDFstatus expand_data PROTOARGs(( BIT_FILE *input, vFILE *output, NODE *nodes, int root_node, CDFstatus iError, CDFstatus oError )); #endif #if SUPPORT_AHUFF static void InitializeTree PROTOARGs((TREE *tree)); static Logical EncodeSymbol PROTOARGs((TREE *tree, INT c, BIT_FILE *output)); static int DecodeSymbol PROTOARGs((TREE *tree, BIT_FILE *input)); static void UpdateModel PROTOARGs((TREE *tree, int c)); static void RebuildTree PROTOARGs((TREE *tree)); static void swap_nodes PROTOARGs((TREE *tree, int i, int j)); static void add_new_node PROTOARGs((TREE *tree, int c)); static DWORD InputBits PROTOARGs((BIT_FILE *bit_file, int bit_count)); #endif /****************************************************************************** * CompressHUFF0. ******************************************************************************/ STATICforIDL CDFstatus CompressHUFF0 (input, iOffset, iSize, iError, oFp, oOffset, oSize, oError) vFILE *input; Int32 iOffset; Int32 iSize; CDFstatus iError; vFILE *oFp; Int32 oOffset; Int32 *oSize; CDFstatus oError; { #if SUPPORT_HUFF BIT_FILE *output; DWORD *counts; NODE *nodes; CODE *codes; CDFstatus pStatus = CDF_OK; int root_node; long newOffset; if (!SEEKv(input,(long)iOffset,vSEEK_SET)) return iError; if (!SEEKv(oFp,(long)oOffset,vSEEK_SET)) return oError; output = StartBitFile (oFp); if (output == NULL) return BAD_MALLOC; *oSize = 0; counts = (DWORD *) CallocateMemory (256, sizeof(DWORD), NULL); if (counts == NULL) { cdf_FreeMemory (output, NULL); return BAD_MALLOC; } nodes = (NODE *) CallocateMemory (514, sizeof(NODE), NULL); if (nodes == NULL) { cdf_FreeMemory (counts, NULL); cdf_FreeMemory (output, NULL); return BAD_MALLOC; } codes = (CODE *) CallocateMemory (257, sizeof(CODE), NULL); if (codes == NULL) { cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (counts, NULL); cdf_FreeMemory (output, NULL); return BAD_MALLOC; } if (!count_bytes(input,counts,iSize)) { cdf_FreeMemory (codes, NULL); cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (counts, NULL); cdf_FreeMemory (output, NULL); return iError; } scale_counts (counts, nodes); if (!output_counts(output,nodes)) { cdf_FreeMemory (codes, NULL); cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (counts, NULL); cdf_FreeMemory (output, NULL); return oError; } root_node = build_tree (nodes); convert_tree_to_code (nodes, codes, (INT) 0, 0, root_node); if (!sX(compress_data(input,output,codes,iSize,iError,oError),&pStatus)) { cdf_FreeMemory (codes, NULL); cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (counts, NULL); cdf_FreeMemory (output, NULL); return pStatus; } if (!EndOutputBitFile(output)) { cdf_FreeMemory (codes, NULL); cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (counts, NULL); return oError; } newOffset = V_tell (oFp); if (newOffset == EOF) { cdf_FreeMemory (codes, NULL); cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (counts, NULL); return oError; } *oSize = newOffset - oOffset; cdf_FreeMemory (codes, NULL); cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (counts, NULL); return pStatus; #else return UNKNOWN_COMPRESSION; #endif } /****************************************************************************** * DecompressHUFF0. ******************************************************************************/ STATICforIDL CDFstatus DecompressHUFF0 (iFp, iOffset, iError, output, oOffset, oError) vFILE *iFp; Int32 iOffset; CDFstatus iError; vFILE *output; Int32 oOffset; CDFstatus oError; { #if SUPPORT_HUFF CDFstatus pStatus = CDF_OK; BIT_FILE *input; NODE *nodes; int root_node; if (!SEEKv(iFp,(long)iOffset,vSEEK_SET)) return iError; if (!SEEKv(output,(long)oOffset,vSEEK_SET)) return oError; input = StartBitFile (iFp); if (input == NULL) return BAD_MALLOC; nodes = (NODE *) CallocateMemory (514, sizeof(NODE), NULL); if (nodes == NULL) { cdf_FreeMemory (input, NULL); return BAD_MALLOC; } if (!input_counts(input,nodes)) { cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (input, NULL); return iError; } root_node = build_tree( nodes ); if (!sX(expand_data(input,output,nodes,root_node,iError,oError),&pStatus)) { cdf_FreeMemory (nodes, NULL); cdf_FreeMemory (input, NULL); return pStatus; } if (!EndInputBitFile(input)) { cdf_FreeMemory (nodes, NULL); return iError; } cdf_FreeMemory (nodes, NULL); return pStatus; #else return UNKNOWN_COMPRESSION; #endif } /****************************************************************************** * CompressAHUFF0. ******************************************************************************/ /* * The high level view of the compression routine is very simple. * First, we initialize the Huffman tree, with just the ESCAPE and * END_OF_STREAM symbols. Then, we sit in a loop, encoding symbols, * and adding them to the model. When there are no more characters * to send, the special END_OF_STREAM symbol is encoded. The decoder * will later be able to use this symbol to know when to quit. */ STATICforIDL CDFstatus CompressAHUFF0 (input, iOffset, iSize, iError, oFp, oOffset, oSize, oError) vFILE *input; Int32 iOffset; Int32 iSize; CDFstatus iError; vFILE *oFp; Int32 oOffset; Int32 *oSize; CDFstatus oError; { #if SUPPORT_AHUFF CDFstatus pStatus = CDF_OK; BIT_FILE *output; TREE *Tree; int c; Int32 i; long newOffset; if (!SEEKv(input,(long)iOffset,vSEEK_SET)) return iError; if (!SEEKv(oFp,(long)oOffset,vSEEK_SET)) return oError; output = StartBitFile (oFp); if (output == NULL) return BAD_MALLOC; *oSize = 0; Tree = (TREE *) CallocateMemory (1, sizeof(TREE), NULL); if (Tree == NULL) { cdf_FreeMemory (output, NULL); return BAD_MALLOC; } InitializeTree (Tree); for (i = 0; i < iSize; i++) { if ((c = V_getc(input)) == EOF) { cdf_FreeMemory (Tree, NULL); cdf_FreeMemory (output, NULL); return iError; } if (!EncodeSymbol(Tree,(INT)c,output)) { cdf_FreeMemory (Tree, NULL); cdf_FreeMemory (output, NULL); return oError; } UpdateModel (Tree, c); } if (!EncodeSymbol(Tree,(INT)END_OF_STREAM,output)) { cdf_FreeMemory (Tree, NULL); cdf_FreeMemory (output, NULL); return oError; } if (!EndOutputBitFile(output)) { cdf_FreeMemory (Tree, NULL); return oError; } newOffset = V_tell (oFp); if (newOffset == EOF) { cdf_FreeMemory (Tree, NULL); return oError; } *oSize = newOffset - oOffset; cdf_FreeMemory (Tree, NULL); return pStatus; #else return UNKNOWN_COMPRESSION; #endif } /****************************************************************************** * DecompressAHUFF0. ******************************************************************************/ STATICforIDL CDFstatus DecompressAHUFF0 (iFp, iOffset, iError, output, oOffset, oError) vFILE *iFp; Int32 iOffset; CDFstatus iError; vFILE *output; Int32 oOffset; CDFstatus oError; { #if SUPPORT_AHUFF CDFstatus pStatus = CDF_OK; BIT_FILE *input; TREE *Tree; int c; if (!SEEKv(iFp,(long)iOffset,vSEEK_SET)) return iError; if (!SEEKv(output,(long)oOffset,vSEEK_SET)) return oError; input = StartBitFile (iFp); if (input == NULL) return BAD_MALLOC; Tree = (TREE *) CallocateMemory (1, sizeof(TREE), NULL); if (Tree == NULL) { cdf_FreeMemory (input, NULL); return BAD_MALLOC; } InitializeTree (Tree); while ((c = DecodeSymbol(Tree,input)) != END_OF_STREAM) { if (c == EOF) { cdf_FreeMemory (Tree, NULL); cdf_FreeMemory (input, NULL); return iError; } if (V_putc(c,output) == EOF) { cdf_FreeMemory (Tree, NULL); cdf_FreeMemory (input, NULL); return oError; } UpdateModel (Tree, c); } if (!EndInputBitFile(input)) { cdf_FreeMemory (Tree, NULL); return iError; } cdf_FreeMemory (Tree, NULL); return pStatus; #else return UNKNOWN_COMPRESSION; #endif } #if SUPPORT_HUFF /****************************************************************************** * output_counts. ******************************************************************************/ /* * In order for the compressor to build the same model, I have to store * the symbol counts in the compressed file so the expander can read * them in. In order to save space, I don't save all 256 symbols * unconditionally. The format used to store counts looks like this: * * start, stop, counts, start, stop, counts, ... 0 * * This means that I store runs of counts, until all the non-zero * counts have been stored. At this time the list is terminated by * storing a start value of 0. Note that at least 1 run of counts has * to be stored, so even if the first start value is 0, I read it in. * It also means that even in an empty file that has no counts, I have * to pass at least one count. * * In order to efficiently use this format, I have to identify runs of * non-zero counts. Because of the format used, I don't want to stop a * run because of just one or two zeros in the count stream. So I have * to sit in a loop looking for strings of three or more zero values in * a row. * * This is simple in concept, but it ends up being one of the most * complicated routines in the whole program. A routine that just * writes out 256 values without attempting to optimize would be much * simpler, but would hurt compression quite a bit on small files. * */ static Logical output_counts (output, nodes) BIT_FILE *output; NODE *nodes; { int first; int last; int next; int i; first = 0; while ( first < 255 && nodes[ first ].count == 0 ) first++; /* * Each time I hit the start of the loop, I assume that first is the * number for a run of non-zero values. The rest of the loop is * concerned with finding the value for last, which is the end of the * run, and the value of next, which is the start of the next run. * At the end of the loop, I assign next to first, so it starts in on * the next run. */ for ( ; first < 256 ; first = next ) { last = first + 1; for ( ; ; ) { for ( ; last < 256 ; last++ ) if ( nodes[ last ].count == 0 ) break; last--; for ( next = last + 1; next < 256 ; next++ ) if ( nodes[ next ].count != 0 ) break; if ( next > 255 ) break; if ( ( next - last ) > 3 ) break; last = next; }; /* * Here is where I output first, last, and all the counts in between. */ if (V_putc(first,output->file) != first) return FALSE; if (V_putc(last,output->file) != last) return FALSE; for ( i = first ; i <= last ; i++ ) { if (V_putc((int) nodes[ i ].count, output->file ) != (int) nodes[ i ].count ) return FALSE; } } if (V_putc(0,output->file) != 0) return FALSE; return TRUE; } #endif #if SUPPORT_HUFF /****************************************************************************** * count_bytes. ******************************************************************************/ /* * This routine counts the frequency of occurence of every byte in * the input file. It marks the place in the input stream where it * started, counts up all the bytes, then returns to the place where * it started. In most C implementations, the length of a file * cannot exceed an unsigned long, so this routine should always * work. */ static Logical count_bytes( input, counts, iSize ) vFILE *input; DWORD *counts; Int32 iSize; { long input_marker; int c; Int32 i; input_marker = V_tell (input); if (input_marker == EOF) return FALSE; for (i = 0; i < iSize; i++) { if ((c = V_getc(input)) == EOF) return FALSE; counts[c]++; } if (!SEEKv(input,input_marker,vSEEK_SET)) return FALSE; return TRUE; } #endif #if SUPPORT_HUFF /****************************************************************************** * scale_counts. ******************************************************************************/ /* * In order to limit the size of my Huffman codes to 16 bits, I scale * my counts down so they fit in an unsigned char, and then store them * all as initial weights in my NODE array. The only thing to be * careful of is to make sure that a node with a non-zero count doesn't * get scaled down to 0. Nodes with values of 0 don't get codes. */ static void scale_counts( counts, nodes ) DWORD *counts; NODE *nodes; { DWORD max_count; int i; max_count = 0; for ( i = 0 ; i < 256 ; i++ ) if ( counts[ i ] > max_count ) max_count = counts[ i ]; if ( max_count == 0 ) { counts[ 0 ] = 1; max_count = 1; } max_count = max_count / 255; max_count = max_count + 1; for ( i = 0 ; i < 256 ; i++ ) { nodes[ i ].count = (WORD) ( counts[ i ] / max_count ); if ( nodes[ i ].count == 0 && counts[ i ] != 0 ) nodes[ i ].count = 1; } nodes[ END_OF_STREAM ].count = 1; } #endif #if SUPPORT_HUFF /****************************************************************************** * build_tree. ******************************************************************************/ /* * Building the Huffman tree is fairly simple. All of the active nodes * are scanned in order to locate the two nodes with the minimum * weights. These two weights are added together and assigned to a new * node. The new node makes the two minimum nodes into its 0 child * and 1 child. The two minimum nodes are then marked as inactive. * This process repeats until their is only one node left, which is the * root node. The tree is done, and the root node is passed back * to the calling routine. * * Node 513 is used here to arbitratily provide a node with a guaranteed * maximum value. It starts off being min_1 and min_2. After all active * nodes have been scanned, I can tell if there is only one active node * left by checking to see if min_1 is still 513. */ static int build_tree( nodes ) NODE *nodes; { int next_free; int i; int min_1; int min_2; nodes[ 513 ].count = 0xffff; for ( next_free = END_OF_STREAM + 1 ; ; next_free++ ) { min_1 = 513; min_2 = 513; for ( i = 0 ; i < next_free ; i++ ) if ( nodes[ i ].count != 0 ) { if ( nodes[ i ].count < nodes[ min_1 ].count ) { min_2 = min_1; min_1 = i; } else if ( nodes[ i ].count < nodes[ min_2 ].count ) min_2 = i; } if ( min_2 == 513 ) break; nodes[ next_free ].count = nodes[ min_1 ].count + nodes[ min_2 ].count; nodes[ min_1 ].saved_count = nodes[ min_1 ].count; nodes[ min_1 ].count = 0; nodes[ min_2 ].saved_count = nodes[ min_2 ].count; nodes[ min_2 ].count = 0; nodes[ next_free ].child_0 = min_1; nodes[ next_free ].child_1 = min_2; } next_free--; nodes[ next_free ].saved_count = nodes[ next_free ].count; return( next_free ); } #endif #if SUPPORT_HUFF /****************************************************************************** * convert_tree_to_code. ******************************************************************************/ /* * Since the Huffman tree is built as a decoding tree, there is * no simple way to get the encoding values for each symbol out of * it. This routine recursively walks through the tree, adding the * child bits to each code until it gets to a leaf. When it gets * to a leaf, it stores the code value in the CODE element, and * returns. */ static void convert_tree_to_code( nodes, codes, code_so_far, bits, node ) NODE *nodes; CODE *codes; INT code_so_far; int bits; int node; { if (node <= END_OF_STREAM) { codes[node].code = (WORD) code_so_far; codes[node].code_bits = bits; return; } code_so_far <<= 1; bits++; convert_tree_to_code (nodes, codes, code_so_far, bits, nodes[node].child_0 ); convert_tree_to_code (nodes, codes, (INT) (code_so_far | 1), bits, nodes[node].child_1 ); return; } #endif #if SUPPORT_HUFF /****************************************************************************** * compress_data. ******************************************************************************/ /* * Once the tree gets built, and the CODE table is built, compressing * the data is a breeze. Each byte is read in, and its corresponding * Huffman code is sent out. */ static CDFstatus compress_data( input, output, codes, iSize, iError, oError) vFILE *input; BIT_FILE *output; CODE *codes; Int32 iSize; CDFstatus iError; CDFstatus oError; { int c; long i; for (i = 0; i < iSize; i++) { if ((c = V_getc(input)) == EOF) return iError; if (!OutputBits(output,(DWORD)codes[c].code,codes[ c ].code_bits)) { return oError; } } if (!OutputBits(output,(DWORD)codes[END_OF_STREAM].code, codes[END_OF_STREAM].code_bits)) return oError; return CDF_OK; } #endif #if SUPPORT_HUFF || SUPPORT_AHUFF /****************************************************************************** * StartBitFile. ******************************************************************************/ static BIT_FILE *StartBitFile (fp) vFILE *fp; { BIT_FILE *bit_file; bit_file = (BIT_FILE *) CallocateMemory (1, sizeof(BIT_FILE), NULL); if (bit_file == NULL) return NULL; bit_file->file = fp; bit_file->rack = 0; bit_file->mask = 0x80; return bit_file; } #endif #if SUPPORT_HUFF || SUPPORT_AHUFF /****************************************************************************** * EndOutputBitFile. ******************************************************************************/ static Logical EndOutputBitFile (bit_file) BIT_FILE *bit_file; { Logical status = TRUE; if (bit_file->mask != 0x80) { if (V_putc(bit_file->rack, bit_file->file) != bit_file->rack) status = FALSE; } cdf_FreeMemory (bit_file, NULL); return status; } #endif #if SUPPORT_HUFF || SUPPORT_AHUFF /****************************************************************************** * EndInputBitFile. ******************************************************************************/ static Logical EndInputBitFile (bit_file) BIT_FILE *bit_file; { cdf_FreeMemory (bit_file, NULL); return TRUE; } #endif #if SUPPORT_HUFF || SUPPORT_AHUFF /****************************************************************************** * OutputBits. ******************************************************************************/ static Logical OutputBits( bit_file, code, count ) BIT_FILE *bit_file; DWORD code; int count; { DWORD mask; mask = 1L << ( count - 1 ); while ( mask != 0) { if ( mask & code ) bit_file->rack |= bit_file->mask; bit_file->mask >>= 1; if ( bit_file->mask == 0 ) { if (V_putc( bit_file->rack, bit_file->file ) != bit_file->rack ) { return FALSE; } bit_file->rack = 0; bit_file->mask = 0x80; } mask >>= 1; } return TRUE; } #endif #if SUPPORT_HUFF || SUPPORT_AHUFF /****************************************************************************** * InputBit. ******************************************************************************/ static int InputBit (bit_file) BIT_FILE *bit_file; { int value; if ( bit_file->mask == 0x80 ) { bit_file->rack = V_getc( bit_file->file ); if ( bit_file->rack == EOF ) return EOF; } value = bit_file->rack & bit_file->mask; bit_file->mask >>= 1; if ( bit_file->mask == 0 ) bit_file->mask = 0x80; return ( value ? 1 : 0 ); } #endif #if SUPPORT_AHUFF /****************************************************************************** * InputBits. ******************************************************************************/ static DWORD InputBits( bit_file, bit_count ) BIT_FILE *bit_file; int bit_count; { DWORD mask; DWORD return_value; mask = 1L << ( bit_count - 1 ); return_value = 0; while ( mask != 0) { if ( bit_file->mask == 0x80 ) { bit_file->rack = V_getc( bit_file->file ); if ( bit_file->rack == EOF ) return ((DWORD) EOF); } if ( bit_file->rack & bit_file->mask ) return_value |= mask; mask >>= 1; bit_file->mask >>= 1; if ( bit_file->mask == 0 ) bit_file->mask = 0x80; } return( return_value ); } #endif #if SUPPORT_HUFF /****************************************************************************** * input_counts. ******************************************************************************/ /* * When expanding, I have to read in the same set of counts. This is * quite a bit easier that the process of writing them out, since no * decision making needs to be done. All I do is read in first, check * to see if I am all done, and if not, read in last and a string of * counts. */ static Logical input_counts( input, nodes ) BIT_FILE *input; NODE *nodes; { int first; int last; int i; int c; for ( i = 0 ; i < 256 ; i++ ) nodes[ i ].count = 0; if ( ( first = V_getc( input->file ) ) == EOF ) return FALSE; if ( ( last = V_getc( input->file ) ) == EOF ) return FALSE; for ( ; ; ) { for ( i = first ; i <= last ; i++ ) { if ( ( c = V_getc( input->file ) ) == EOF ) return FALSE; nodes[ i ].count = (WORD) c; } if ( ( first = V_getc( input->file ) ) == EOF ) return FALSE; if ( first == 0 ) break; if ( ( last = V_getc( input->file ) ) == EOF ) return FALSE; } nodes[ END_OF_STREAM ].count = 1; return TRUE; } #endif #if SUPPORT_HUFF /****************************************************************************** * expand_data. ******************************************************************************/ /* * Expanding compressed data is a little harder than the compression * phase. As each new symbol is decoded, the tree is traversed, * starting at the root node, reading a bit in, and taking either the * child_0 or child_1 path. Eventually, the tree winds down to a * leaf node, and the corresponding symbol is output. If the symbol * is the END_OF_STREAM symbol, it doesn't get written out, and * instead the whole process terminates. */ static CDFstatus expand_data( input, output, nodes, root_node, iError, oError) BIT_FILE *input; vFILE *output; NODE *nodes; int root_node; CDFstatus iError; CDFstatus oError; { int node; for ( ; ; ) { node = root_node; do { int bit = InputBit (input); if (bit == EOF) return iError; if (bit) node = nodes[ node ].child_1; else node = nodes[ node ].child_0; } while ( node > END_OF_STREAM ); if ( node == END_OF_STREAM ) break; if ( ( V_putc( node, output ) ) != node ) return oError; } return CDF_OK; } #endif #if SUPPORT_AHUFF /****************************************************************************** * InitializeTree. ******************************************************************************/ /* * When performing adaptive compression, the Huffman tree starts out * very nearly empty. The only two symbols present initially are the * ESCAPE symbol and the END_OF_STREAM symbol. The ESCAPE symbol has to * be included so we can tell the expansion prog that we are transmitting a * previously unseen symbol. The END_OF_STREAM symbol is here because * it is greater than eight bits, and our ESCAPE sequence only allows for * eight bit symbols following the ESCAPE code. * * In addition to setting up the root node and its two children, this * routine also initializes the leaf array. The ESCAPE and END_OF_STREAM * leaf elements are the only ones initially defined, the rest of the leaf * elements are set to -1 to show that they aren't present in the * Huffman tree yet. */ static void InitializeTree( tree ) TREE *tree; { int i; tree->nodes[ ROOT_NODE ].child = ROOT_NODE + 1; tree->nodes[ ROOT_NODE ].child_is_leaf = FALSE; tree->nodes[ ROOT_NODE ].weight = 2; tree->nodes[ ROOT_NODE ].parent = -1; tree->nodes[ ROOT_NODE + 1 ].child = END_OF_STREAM; tree->nodes[ ROOT_NODE + 1 ].child_is_leaf = TRUE; tree->nodes[ ROOT_NODE + 1 ].weight = 1; tree->nodes[ ROOT_NODE + 1 ].parent = ROOT_NODE; tree->leaf[ END_OF_STREAM ] = ROOT_NODE + 1; tree->nodes[ ROOT_NODE + 2 ].child = ESCAPE; tree->nodes[ ROOT_NODE + 2 ].child_is_leaf = TRUE; tree->nodes[ ROOT_NODE + 2 ].weight = 1; tree->nodes[ ROOT_NODE + 2 ].parent = ROOT_NODE; tree->leaf[ ESCAPE ] = ROOT_NODE + 2; tree->next_free_node = ROOT_NODE + 3; for ( i = 0 ; i < END_OF_STREAM ; i++ ) tree->leaf[ i ] = -1; } #endif #if SUPPORT_AHUFF /****************************************************************************** * EncodeSymbol. ******************************************************************************/ /* * This routine is responsible for taking a symbol, and converting * it into the sequence of bits dictated by the Huffman tree. The * only complication is that we are working are way up from the leaf * to the root, and hence are getting the bits in reverse order. This * means we have to rack up the bits in an integer and then send them * out after they are all accumulated. In this version of the program, * we keep our codes in a long integer, so the maximum count is set * to an arbitray limit of 0x8000. It could be set as high as 65535 * if desired. */ static Logical EncodeSymbol( tree, c, output ) TREE *tree; INT c; BIT_FILE *output; { DWORD code; DWORD current_bit; int code_size; int current_node; code = 0; current_bit = 1; code_size = 0; current_node = tree->leaf[(int)c]; if ( current_node == -1 ) current_node = tree->leaf[ ESCAPE ]; while ( current_node != ROOT_NODE ) { if ( ( current_node & 1 ) == 0 ) code |= current_bit; current_bit <<= 1; code_size++; current_node = tree->nodes[ current_node ].parent; }; if (!OutputBits(output,code,code_size)) return FALSE; if ( tree->leaf[(int)c] == -1 ) { if (!OutputBits(output,(DWORD)c,8)) return FALSE; add_new_node( tree, (int) c ); } return TRUE; } #endif #if SUPPORT_AHUFF /****************************************************************************** * DecodeSymbol. ******************************************************************************/ /* * Decoding symbols is easy. We start at the root node, then go down * the tree until we reach a leaf. At each node, we decide which * child to take based on the next input bit. After getting to the * leaf, we check to see if we read in the ESCAPE code. If we did, * it means that the next symbol is going to come through in the next * eight bits, unencoded. If that is the case, we read it in here, * and add the new symbol to the table. */ static int DecodeSymbol (tree, input) TREE *tree; BIT_FILE *input; { int current_node; int c; int bit; current_node = ROOT_NODE; while ( !tree->nodes[ current_node ].child_is_leaf ) { current_node = tree->nodes[ current_node ].child; bit = InputBit (input); if (bit == EOF) return EOF; current_node += bit; } c = tree->nodes[ current_node ].child; if ( c == ESCAPE ) { c = (int) InputBits( input, 8 ); if (c == EOF) return EOF; add_new_node( tree, c ); } return( c ); } #endif #if SUPPORT_AHUFF /****************************************************************************** * UpdateModel. ******************************************************************************/ /* * UpdateModel is called to increment the count for a given symbol. * After incrementing the symbol, this code has to work its way up * through the parent nodes, incrementing each one of them. That is * the easy part. The hard part is that after incrementing each * parent node, we have to check to see if it is now out of the proper * order. If it is, it has to be moved up the tree into its proper * place. */ static void UpdateModel( tree, c ) TREE *tree; int c; { int current_node; int new_node; if ( tree->nodes[ ROOT_NODE].weight == MAX_WEIGHT ) RebuildTree( tree ); current_node = tree->leaf[ c ]; while ( current_node != -1 ) { tree->nodes[ current_node ].weight++; for ( new_node = current_node ; new_node > ROOT_NODE ; new_node-- ) if ( tree->nodes[ new_node - 1 ].weight >= tree->nodes[ current_node ].weight ) break; if ( current_node != new_node ) { swap_nodes( tree, current_node, new_node ); current_node = new_node; } current_node = tree->nodes[ current_node ].parent; } return; } #endif #if SUPPORT_AHUFF /****************************************************************************** * RebuildTree. ******************************************************************************/ /* * Rebuilding the tree takes place when the counts have gone too * high. From a simple point of view, rebuilding the tree just means that * we divide every count by two. Unfortunately, due to truncation effects, * this means that the tree's shape might change. Some nodes might move * up due to cumulative increases, while others may move down. */ static void RebuildTree( tree ) TREE *tree; { int i; int j; int k; WORD weight; /* * To start rebuilding the table, I collect all the leaves of the Huffman * tree and put them in the end of the tree. While I am doing that, I * scale the counts down by a factor of 2. */ j = tree->next_free_node - 1; for ( i = j ; i >= ROOT_NODE ; i-- ) { if ( tree->nodes[ i ].child_is_leaf ) { tree->nodes[ j ] = tree->nodes[ i ]; tree->nodes[ j ].weight = (tree->nodes[j].weight + 1) / ((WORD) 2); j--; } } /* * At this point, j points to the first free node. I now have all the * leaves defined, and need to start building the higher nodes on the * tree. I will start adding the new internal nodes at j. Every time * I add a new internal node to the top of the tree, I have to check to * see where it really belongs in the tree. It might stay at the top, * but there is a good chance I might have to move it back down. If it * does have to go down, I use the memmove() function to scoot everyone * bigger up by one node. Note that memmove() may have to be change * to memcpy() on some UNIX systems. The parameters are unchanged, as * memmove and memcpy have the same set of parameters. */ for ( i = tree->next_free_node - 2 ; j >= ROOT_NODE ; i -= 2, j-- ) { k = i + 1; tree->nodes[ j ].weight = tree->nodes[ i ].weight + tree->nodes[ k ].weight; weight = tree->nodes[ j ].weight; tree->nodes[ j ].child_is_leaf = FALSE; for ( k = j + 1 ; weight < tree->nodes[ k ].weight ; k++ ) ; k--; memmove( &tree->nodes[ j ], &tree->nodes[ j + 1 ], ( k - j ) * sizeof( struct node ) ); tree->nodes[ k ].weight = weight; tree->nodes[ k ].child = i; tree->nodes[ k ].child_is_leaf = FALSE; } /* * The final step in tree reconstruction is to go through and set up * all of the leaf and parent members. This can be safely done now * that every node is in its final position in the tree. */ for ( i = tree->next_free_node - 1 ; i >= ROOT_NODE ; i-- ) { if ( tree->nodes[ i ].child_is_leaf ) { k = tree->nodes[ i ].child; tree->leaf[ k ] = i; } else { k = tree->nodes[ i ].child; tree->nodes[ k ].parent = tree->nodes[ k + 1 ].parent = i; } } return; } #endif #if SUPPORT_AHUFF /****************************************************************************** * swap_nodes. ******************************************************************************/ /* * Swapping nodes takes place when a node has grown too big for its * spot in the tree. When swapping nodes i and j, we rearrange the * tree by exchanging the children under i with the children under j. */ static void swap_nodes( tree, i, j ) TREE *tree; int i; int j; { struct node temp; if ( tree->nodes[ i ].child_is_leaf ) tree->leaf[ tree->nodes[ i ].child ] = j; else { tree->nodes[ tree->nodes[ i ].child ].parent = j; tree->nodes[ tree->nodes[ i ].child + 1 ].parent = j; } if ( tree->nodes[ j ].child_is_leaf ) tree->leaf[ tree->nodes[ j ].child ] = i; else { tree->nodes[ tree->nodes[ j ].child ].parent = i; tree->nodes[ tree->nodes[ j ].child + 1 ].parent = i; } temp = tree->nodes[ i ]; tree->nodes[ i ] = tree->nodes[ j ]; tree->nodes[ i ].parent = temp.parent; temp.parent = tree->nodes[ j ].parent; tree->nodes[ j ] = temp; return; } #endif #if SUPPORT_AHUFF /****************************************************************************** * add_new_node. ******************************************************************************/ /* * Adding a new node to the tree is pretty simple. It is just a matter * of splitting the lightest-weight node in the tree, which is the highest * valued node. We split it off into two new nodes, one of which is the * one being added to the tree. We assign the new node a weight of 0, * so the tree doesn't have to be adjusted. It will be updated later when * the normal update process occurs. Note that this code assumes that * the lightest node has a leaf as a child. If this is not the case, * the tree would be broken. */ static void add_new_node( tree, c ) TREE *tree; int c; { int lightest_node; int new_node; int zero_weight_node; lightest_node = tree->next_free_node - 1; new_node = tree->next_free_node; zero_weight_node = tree->next_free_node + 1; tree->next_free_node += 2; tree->nodes[ new_node ] = tree->nodes[ lightest_node ]; tree->nodes[ new_node ].parent = lightest_node; tree->leaf[ tree->nodes[ new_node ].child ] = new_node; tree->nodes[ lightest_node ].child = new_node; tree->nodes[ lightest_node ].child_is_leaf = FALSE; tree->nodes[ zero_weight_node ].child = c; tree->nodes[ zero_weight_node ].child_is_leaf = TRUE; tree->nodes[ zero_weight_node ].weight = 0; tree->nodes[ zero_weight_node ].parent = lightest_node; tree->leaf[ c ] = zero_weight_node; return; } #endif