/* ** Copyright (C) 2004-2007 by Carnegie Mellon University. ** ** @OPENSOURCE_HEADER_START@ ** ** Use of the SILK system and related source code is subject to the terms ** of the following licenses: ** ** GNU Public License (GPL) Rights pursuant to Version 2, June 1991 ** Government Purpose License Rights (GPLR) pursuant to DFARS 252.225-7013 ** ** NO WARRANTY ** ** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER ** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY ** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN ** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY ** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT ** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE, ** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE ** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT, ** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY ** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF ** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES. ** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF ** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON ** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE ** DELIVERABLES UNDER THIS LICENSE. ** ** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie ** Mellon University, its trustees, officers, employees, and agents from ** all claims or demands made against them (and any related losses, ** expenses, or attorney's fees) arising out of, or relating to Licensee's ** and/or its sub licensees' negligent use or willful misuse of or ** negligent conduct or willful misconduct regarding the Software, ** facilities, or other rights or assistance granted by Carnegie Mellon ** University under this License, including, but not limited to, any ** claims of product liability, personal injury, death, damage to ** property, or violation of any laws or regulations. ** ** Carnegie Mellon University Software Engineering Institute authored ** documents are sponsored by the U.S. Department of Defense under ** Contract F19628-00-C-0003. Carnegie Mellon University retains ** copyrights in all material produced under this contract. The U.S. ** Government retains a non-exclusive, royalty-free license to publish or ** reproduce these documents, or allow others to do so, for U.S. ** Government purposes only pursuant to the copyright license under the ** contract clause at 252.227.7013. ** ** @OPENSOURCE_HEADER_END@ */ /* ** bagtree.c ** ** Implementation of the bagtree library according to bagtree.h ** */ #include "silk.h" RCSIDENT("$SiLK: bagtree.c 7514 2007-06-14 12:20:54Z mthomas $"); #include "utils.h" #include "bagtree.h" /* LOCAL DEFINES AND TYPEDEFS */ /* Version number to write into the Bag's header */ #define RWBAG_FILE_VERSION 3 /* Definition of stats */ struct _skBag_stats { /* count of internal nodes allocated */ uint32_t nodes; /* count of leaf blocks allocated */ uint32_t leaves; /* number of bytes allocated to nodes */ uint64_t nodes_size; /* number of bytes allocated to leaves */ uint64_t leaves_size; /* count of entries inserted in the tree */ uint64_t keys_inserted; /* count of entries inserted in the tree */ uint64_t unique_keys; /* maximum counter value */ skBag_counter_t max_counter; /* minimum key inserted */ skBag_key_t min_key; /* maximum key inserted */ skBag_key_t max_key; }; /* Definition of the iterator structure */ struct _skBag_iterator { /* pointer to the bag to which this iterator was created */ const skBag_header_t *bag; /* path of offsets through the tree, bag->levels insize */ skBag_blocksize_t *offset_path; /* if true, iterator skips the first counter in the leaf block, so * that we don't re-find the same node with subsequent calls to * getNext(). */ int f_skip_counter; }; #define MIN_LEVELS 1 #define MAX_LEVELS 32 #define MIN_KEY_SIZE 1 #define MAX_KEY_SIZE 32 #define SKBAG_GET_LEVEL_SIZE(bag, level) ((bag)->level_size[(level)]) #define SKBAG_GET_LEVEL_OFFSET(bag, level) ((bag)->level_offset[(level)]) #define SKBAG_GET_KEY_BITS(key, bag, level) \ GET_MASKED_BITS((key), SKBAG_GET_LEVEL_OFFSET((bag), (level)), \ SKBAG_GET_LEVEL_SIZE((bag), (level))) #define SKBAG_GET_LEVEL_BLOCKS(bag, level) \ ((skBag_blocksize_t)(1 << SKBAG_GET_LEVEL_SIZE((bag), (level)))) #define SKBAG_COUNTER_IS_NULL(counter) \ (*(counter) == SKBAG_COUNTER_MIN) #define INCREMENT_COUNTER(counter) ((*(counter)) += 1) #define SET_COUNTER(counter, value) ((counter) = (value)) #define SKBAG_COUNTER_COPY(dst, src) ((dst) = (src)) #define SKBAG_KEY_COPY(src, dst) (*(dst) = *(src)) /* LOCAL FUNCTIONS */ static skBag_counter_t *_bagAllocToCounter( skBag_header_t *bag, const skBag_key_t *key); static skBag_counter_t *_bagGetCounterPointer( const skBag_header_t *bag, const skBag_key_t *key); static skBag_err_t _bagReadProcess( skstream_t *stream_in, skBag_header_t *bag, skBag_err_t (*bag_fn)(skBag_header_t *bag, const skBag_key_t *key, const skBag_counter_t *counter)); static skBag_err_t _bagReadHeader( skstream_t *stream_in, fileVersion_t *bag_version, int *swap_flag); static void _bagNodesFree(skBag_header_t *bag); static skBag_counter_t *_bagTraverseSubtree( skBag_iterator_t *iter, const skBag_node_t *subtree, skBag_level_t level, int f_use_iterator_start); static void _bagUpdateContentStats( skBag_header_t *bag, const skBag_key_t *key, const skBag_counter_t *counter); /* FUNCTION DEFINITIONS */ /* create a bag */ skBag_err_t skBag_alloc( skBag_header_t **bag, skBag_level_t levels, const skBag_levelsize_t *level_sizes) { uint8_t key_size = 0; skBag_err_t rv = SKBAG_OK; skBag_header_t *result = NULL; skBag_level_t lvl; skBag_levelsize_t offset = 0; /* check inputs */ if (levels < MIN_LEVELS || levels > MAX_LEVELS) { /* bagtree must have at least one level */ return SKBAG_ERR_INPUT; } if (level_sizes == NULL) { /* must have a valid array of level sizes */ return SKBAG_ERR_INPUT; } /* each level must contain at least 1 bit of key. */ for (lvl = 0; lvl < levels; ++lvl) { if (level_sizes[lvl] < 1) { return SKBAG_ERR_INPUT; } key_size += level_sizes[lvl]; } /* the tree can encode at most a 32b key */ if (key_size < MIN_KEY_SIZE || key_size > MAX_KEY_SIZE) { return SKBAG_ERR_INPUT; } /* allocate header */ result = calloc(1, sizeof(skBag_header_t)); if (result == NULL) { rv = SKBAG_ERR_MEMORY; goto END; } /* allocate level ranges */ result->level_size = malloc(levels * sizeof(skBag_levelsize_t)); if (result->level_size == NULL) { rv = SKBAG_ERR_MEMORY; goto END; } /* allocate level offsets */ result->level_offset = malloc(levels * sizeof(skBag_levelsize_t)); if (result->level_offset == NULL) { rv = SKBAG_ERR_MEMORY; goto END; } /* allocate stats block; zero out everything except min key */ result->stats = calloc(1, sizeof(skBag_stats_t)); if (result->stats == NULL) { rv = SKBAG_ERR_MEMORY; goto END; } result->stats->min_key = SKBAG_KEY_MAX; /* memory is all allocated okay, initialize values */ result->root.child = NULL; result->levels = levels; /* * go through the level size array again, this time setting the * values of the new array. */ offset = key_size; for (lvl = 0; lvl < levels; ++lvl) { result->level_size[lvl] = level_sizes[lvl]; offset -= level_sizes[lvl]; result->level_offset[lvl] = offset; } /* set pointer and return OK */ *bag = result; END: if (rv != SKBAG_OK) { if (result) { if (result->level_size) { free(result->level_size); } if (result->level_offset) { free(result->level_offset); } if (result->stats) { free(result->stats); } free(result); } } return rv; } skBag_err_t skBag_create( skBag_header_t **bag) { const skBag_levelsize_t level_sizes[4] = { 9, 9, 9, 5 }; return skBag_alloc(bag, 4, level_sizes); } /* destroy a bag */ skBag_err_t skBag_free( skBag_header_t *bag) { if (bag == NULL) { return SKBAG_ERR_INPUT; } /* free nodes in tree */ _bagNodesFree(bag); /* free level definitions */ if (bag->level_size) { free(bag->level_size); } /* free level offsets */ if (bag->level_offset) { free(bag->level_offset); } /* free stats block */ if (bag->stats != NULL) { free(bag->stats); } /* free header */ free(bag); return SKBAG_OK; } /* get counter at 'key' */ skBag_err_t skBag_getCounter( const skBag_header_t *bag, const skBag_key_t *key, skBag_counter_t *counter) { skBag_counter_t *result; if (bag == NULL) { SKBAG_COUNTER_COPY(*counter, 0); return SKBAG_ERR_INPUT; } if (bag->root.child == NULL) { SKBAG_COUNTER_COPY(*counter, 0); return SKBAG_ERR_KEY_NOT_FOUND; } result = _bagGetCounterPointer(bag, key); if (result == NULL) { SKBAG_COUNTER_COPY(*counter, 0); return SKBAG_ERR_KEY_NOT_FOUND; } SKBAG_COUNTER_COPY(*counter, *result); return SKBAG_OK; } /* add value 'value_added' to counter for 'key'; create key if needed */ skBag_err_t skBag_addToCounter( skBag_header_t *bag, const skBag_key_t *key, const skBag_counter_t *value_added) { skBag_counter_t counter_temp; skBag_counter_t *counter; if (bag == NULL) { return SKBAG_ERR_INPUT; } if (key == NULL) { return SKBAG_ERR_INPUT; } if (value_added == NULL) { return SKBAG_ERR_INPUT; } /* get existing counter or allocate it */ counter = _bagAllocToCounter(bag, key); if (counter == NULL) { return SKBAG_ERR_MEMORY; } if (SKBAG_COUNTER_IS_NULL(counter)) { /* update new counter statistics */ bag->stats->unique_keys += 1; } /* note, this will not work when switching from uint counters to structs. */ counter_temp = *counter + *value_added; if (counter_temp < *counter) { /* counter wrapped, return error */ return SKBAG_ERR_OP_BOUNDS; } /* update statistics */ if (SKBAG_COUNTER_IS_NULL(counter)) { bag->stats->keys_inserted += 1; } *counter = counter_temp; _bagUpdateContentStats(bag, key, counter); return SKBAG_OK; } /* increment counter for 'key'; create key if needed */ skBag_err_t skBag_incrCounter( skBag_header_t *bag, const skBag_key_t *key) { skBag_counter_t *counter; if (bag == NULL) { return SKBAG_ERR_INPUT; } /* get existing counter or allocate it */ counter = _bagAllocToCounter(bag, key); if (counter == NULL) { return SKBAG_ERR_MEMORY; } if (SKBAG_COUNTER_IS_NULL(counter)) { /* update new counter statistics */ bag->stats->unique_keys += 1; } /* increment */ INCREMENT_COUNTER(counter); /* update statistics */ bag->stats->keys_inserted += 1; _bagUpdateContentStats(bag, key, counter); return SKBAG_OK; } /* set counter for 'key' to 'value'. create key if needed */ skBag_err_t skBag_setCounter( skBag_header_t *bag, const skBag_key_t *key, const skBag_counter_t *value) { skBag_counter_t *counter; if (bag == NULL) { return SKBAG_ERR_INPUT; } /* get counter or allocate it */ counter = _bagAllocToCounter(bag, key); if (counter == NULL) { return SKBAG_ERR_MEMORY; } if (SKBAG_COUNTER_IS_NULL(counter)) { /* update new counter statistics */ bag->stats->unique_keys += 1; } /* set */ SET_COUNTER(*counter, *value); /* update statistics */ bag->stats->keys_inserted += 1; _bagUpdateContentStats(bag, key, counter); return SKBAG_OK; } /* subtract 'value_sub' from counter at 'key'. key must exist */ skBag_err_t skBag_subtractFromCounter( skBag_header_t *bag, const skBag_key_t *key, const skBag_counter_t *value_sub) { skBag_counter_t counter_temp; skBag_counter_t *counter; if (bag == NULL) { return SKBAG_ERR_INPUT; } if (key == NULL) { return SKBAG_ERR_INPUT; } if (value_sub == NULL) { return SKBAG_ERR_INPUT; } /* try to get existing counter */ counter = _bagGetCounterPointer(bag, key); if (counter == NULL) { return SKBAG_ERR_OP_BOUNDS; } /* note, this will not work when switching from uint counters to structs. */ counter_temp = *counter - *value_sub; if (counter_temp > *counter) { /* counter wrapped, return error */ return SKBAG_ERR_OP_BOUNDS; } /* * update statistics (bounds may be incorrect after this * operation. a full iteration over the tree would be necessary * to recompute bounds.) */ *counter = counter_temp; if (SKBAG_COUNTER_IS_NULL(counter)) { bag->stats->keys_inserted -= 1; bag->stats->unique_keys -= 1; } return SKBAG_OK; } /* create iterator */ skBag_err_t skBag_allocIterator( const skBag_header_t *bag, skBag_iterator_t **iter) { /* check inputs */ if (bag == NULL) { return SKBAG_ERR_INPUT; } /* allocate iterator */ *iter = malloc(sizeof(skBag_iterator_t)); if (*iter == NULL) { return SKBAG_ERR_MEMORY; } /* allocate level ranges */ (*iter)->offset_path = malloc(bag->levels * sizeof(skBag_blocksize_t)); if ((*iter)->offset_path == NULL) { free(*iter); return SKBAG_ERR_MEMORY; } (*iter)->bag = bag; skBag_resetIterator(*iter); return SKBAG_OK; } /* * skBag_freeIterator * * deallocate all memory associated with a bagtree iterator * * Input: * * iter - a pointer to the iterator to free * * Output: * * a skBag_err_t error code */ skBag_err_t skBag_freeIterator( skBag_iterator_t *iter) { if (iter == NULL) { return SKBAG_ERR_INPUT; } if (iter->offset_path != NULL) { free(iter->offset_path); } free(iter); return SKBAG_OK; } /* reset the iterator */ skBag_err_t skBag_resetIterator( skBag_iterator_t *iter) { skBag_level_t lvl; if (iter == NULL) { return SKBAG_ERR_INPUT; } /* don't want to skip the first counter */ iter->f_skip_counter = 0; for (lvl = 0; lvl < iter->bag->levels; ++lvl) { iter->offset_path[lvl] = (skBag_blocksize_t)0; } return SKBAG_OK; } /* return next key/counter pair */ skBag_err_t skBag_getNext( skBag_iterator_t *iter, skBag_key_t *key, skBag_counter_t *counter) { skBag_counter_t *result; skBag_level_t lvl; /* check input */ if (iter == NULL) { return SKBAG_ERR_INPUT; } assert(iter->bag != NULL); assert(iter->offset_path != NULL); result = _bagTraverseSubtree(iter, &iter->bag->root, (skBag_level_t)0, 1); if (result == NULL) { /* there was no next key. */ return SKBAG_ERR_KEY_NOT_FOUND; } /* counter found */ SKBAG_COUNTER_COPY(*counter, *result); /* generate key value from path */ *key = 0; for (lvl = 0; lvl < iter->bag->levels; ++lvl) { *key += (iter->offset_path[lvl] << SKBAG_GET_LEVEL_OFFSET(iter->bag, lvl)); } return SKBAG_OK; } /* print statistics for the bag */ skBag_err_t skBag_printTreeStats( const skBag_header_t *bag, skstream_t *stream_out) { uint32_t total_counters; if (bag == NULL) { return SKBAG_ERR_INPUT; } if (stream_out == NULL) { return SKBAG_ERR_INPUT; } skStreamPrint(stream_out, ("%18s: %u (%" PRIu64 " bytes)\n"), "nodes allocated", bag->stats->nodes, bag->stats->nodes_size); skStreamPrint(stream_out, ("%18s: %u (%" PRIu64 " bytes)\n"), "leaves allocated", bag->stats->leaves, bag->stats->leaves_size); skStreamPrint(stream_out, ("%18s: %" PRIu64 " (%" PRIu64 " unique)\n"), "keys inserted", bag->stats->keys_inserted, bag->stats->unique_keys); total_counters = ((uint32_t)bag->stats->leaves * (uint32_t)SKBAG_GET_LEVEL_BLOCKS(bag,bag->levels-1)); skStreamPrint(stream_out, "%18s: %.02f%%\n", "counter density", (100.0 * (double)bag->stats->unique_keys / (double)total_counters)); return SKBAG_OK; } /* Write the counter_data[0..(num_keys-1)] as bag to 'stream_out' */ skBag_err_t skBag_writeArray( const skBag_counter_t *counter_data, skBag_key_t num_keys, skstream_t *stream_out) { skBag_key_t i; genericHeader file_header; int rv; if (counter_data == NULL || stream_out == NULL) { return SKBAG_ERR_INPUT; } /* output header */ if ((rv = skStreamSetSilkFormat(stream_out, FT_RWBAG)) || (rv = skStreamSetSilkVersion(stream_out, RWBAG_FILE_VERSION)) || (rv = skStreamWriteSilkHeader(stream_out, &file_header, sizeof(file_header)))) { skStreamPrintLastErr(stream_out, rv, &skAppPrintErr); return SKBAG_ERR_OUTPUT; } /* loop over the keys and print the non-zero values */ for (i = 0; i < num_keys; ++i, ++counter_data) { if (SKBAG_COUNTER_IS_NULL(counter_data)) { continue; } if (skStreamWrite(stream_out, &i, sizeof(skBag_key_t)) != sizeof(skBag_key_t)) { skStreamPrintLastErr(stream_out, -1, &skAppPrintErr); return SKBAG_ERR_OUTPUT; } if (skStreamWrite(stream_out, counter_data, sizeof(skBag_counter_t)) != sizeof(skBag_counter_t)) { skStreamPrintLastErr(stream_out, -1, &skAppPrintErr); return SKBAG_ERR_OUTPUT; } } rv = skStreamFlush(stream_out); if (rv) { skStreamPrintLastErr(stream_out, rv, &skAppPrintErr); return SKBAG_ERR_OUTPUT; } return SKBAG_OK; } /* write bag to file */ skBag_err_t skBag_writeBinary( const skBag_header_t *bag, skstream_t *stream_out) { skBag_iterator_t *iter; skBag_err_t err, err2; skBag_key_t key; skBag_counter_t counter; genericHeader file_header; int rv; if (bag == NULL) { return SKBAG_ERR_INPUT; } if (stream_out == NULL) { return SKBAG_ERR_INPUT; } err = skBag_allocIterator(bag, &iter); if (err != SKBAG_OK) { return err; } /* output header */ if ((rv = skStreamSetSilkFormat(stream_out, FT_RWBAG)) || (rv = skStreamSetSilkVersion(stream_out, RWBAG_FILE_VERSION)) || (rv = skStreamWriteSilkHeader(stream_out, &file_header, sizeof(file_header)))) { skStreamPrintLastErr(stream_out, rv, &skAppPrintErr); err = SKBAG_ERR_OUTPUT; goto END; } /* output key/counter pairs */ while ((err = skBag_getNext(iter, &key, &counter)) == SKBAG_OK) { skStreamWrite(stream_out, (void*)&key, sizeof(skBag_key_t)); skStreamWrite(stream_out, (void*)&counter, sizeof(skBag_counter_t)); } if (err == SKBAG_ERR_KEY_NOT_FOUND) { /* expected behavior. Reset 'err' */ err = SKBAG_OK; } else { skAppPrintErr("skBag_writeBinary: error looping"); } err2 = skBag_freeIterator(iter); if (err2 != SKBAG_OK) { skAppPrintErr("skBag_writeBinary: error %u freeing iterator", err2); /* Return err2 unless err already set */ if (err == SKBAG_OK) { err = err2; } } rv = skStreamFlush(stream_out); if (rv) { skStreamPrintLastErr(stream_out, rv, &skAppPrintErr); err = SKBAG_ERR_OUTPUT; goto END; } END: return err; } /* create bag and fill it with contents from file */ skBag_err_t skBag_readBinary( skBag_header_t **bag, skstream_t *stream_in) { skBag_level_t levels = (skBag_level_t) 4; skBag_levelsize_t level_size[4] = { 9, 9, 9, 5 }; skBag_err_t err; if (bag == NULL || stream_in == NULL) { return SKBAG_ERR_INPUT; } /* create bag */ err = skBag_alloc(bag, levels, level_size); if (err) { return err; } return _bagReadProcess(stream_in, *bag, &skBag_setCounter); } /* add contents of file to existing bag. increment counters for * overlapping keys. */ skBag_err_t skBag_addBinary( skBag_header_t *bag, skstream_t *stream_in) { if (bag == NULL || stream_in == NULL) { return SKBAG_ERR_INPUT; } return _bagReadProcess(stream_in, bag, &skBag_addToCounter); } const char *skBag_strerror( skBag_err_t err_code) { static char err_buf[32]; switch (err_code) { case SKBAG_OK: return "Success"; case SKBAG_ERR_MEMORY: return "Memory allocation error"; case SKBAG_ERR_KEY_NOT_FOUND: return "Key not found"; case SKBAG_ERR_INPUT: return "Illegal input"; case SKBAG_ERR_OP_BOUNDS: return "Overflow in counter"; case SKBAG_ERR_OUTPUT: return "Error writing to stream"; } snprintf(err_buf, sizeof(err_buf), "Unknown Error #%d", (int)err_code); return err_buf; } /* * * INTERNAL FUNCTIONS * */ /* * counter = _bagAllocToCounter(bag, key); * * Similar to _bagGetCounterPointer(), but will allocates nodes * along the way as necessary. Returns NULL if a node cannot be * allocated. */ static skBag_counter_t *_bagAllocToCounter( skBag_header_t *bag, const skBag_key_t *key) { skBag_node_t *subtree; skBag_level_t lvl; uint32_t key_bits; assert(bag != NULL); assert(key != NULL); subtree = &bag->root; for (lvl = 0; lvl < bag->levels - 1; ++lvl) { /* if we have are not the leaf level, allocate node */ if (subtree->child == NULL) { /* no child node exists, allocating block */ uint32_t child_count = SKBAG_GET_LEVEL_BLOCKS(bag, lvl); subtree->child = calloc(child_count, sizeof(skBag_node_t)); if (subtree->child == NULL) { return NULL; } /* update statistics */ bag->stats->nodes += 1; bag->stats->nodes_size += child_count * sizeof(skBag_node_t); } key_bits = SKBAG_GET_KEY_BITS(*key, bag, lvl); subtree = &(subtree->child[key_bits]); } /* we are currently on the last node level, our child should be a * leaf */ if (subtree->leaf == NULL) { uint32_t leaf_count = SKBAG_GET_LEVEL_BLOCKS(bag, lvl); /* actually not a skBag_node_t we are allocating */ subtree->leaf = calloc(leaf_count, sizeof(skBag_counter_t)); if (subtree->leaf == NULL) { return NULL; } /* update statistics */ bag->stats->leaves += 1; bag->stats->leaves_size += leaf_count * sizeof(skBag_counter_t); } key_bits = SKBAG_GET_KEY_BITS(*key, bag, bag->levels - 1); return &subtree->leaf[key_bits]; } /* * counter = _bagGetCounterPointer(bag, key); * * Given a bag 'bag' and a pointer to a key 'key', return a * pointer to the key's counter, or return NULL if the key does not * exist. */ static skBag_counter_t *_bagGetCounterPointer( const skBag_header_t *bag, const skBag_key_t *key) { const skBag_node_t *subtree; skBag_level_t lvl; uint32_t key_bits; assert(bag != NULL); assert(key != NULL); subtree = &(bag->root); for (lvl = 0; lvl < bag->levels - 1; ++lvl) { if (subtree->child == NULL) { return NULL; } key_bits = SKBAG_GET_KEY_BITS(*key, bag, lvl); subtree = &subtree->child[key_bits]; } if (subtree->child == NULL) { return NULL; } key_bits = SKBAG_GET_KEY_BITS(*key, bag, bag->levels - 1); return &subtree->leaf[key_bits]; } /* * status = _bagReadProcess(stream_in, bag, process_func); * * Read a binary bag file from 'stream_in', and process each * key-counter pair with the function pointed to by 'bag_fn', * passing 'bag' as the first argument to the 'bag_fn'. */ static skBag_err_t _bagReadProcess( skstream_t *stream_in, skBag_header_t *bag, skBag_err_t (*bag_fn)(skBag_header_t *bag, const skBag_key_t *key, const skBag_counter_t *counter)) { int f_swap_byte_order; fileVersion_t bag_version; skBag_err_t err; skBag_key_t key; uint32_t counter_32; skBag_counter_t counter; ssize_t recs; /* read header */ err = _bagReadHeader(stream_in, &bag_version, &f_swap_byte_order); if (err != SKBAG_OK) { return err; } /* read key/counter pairs */ while ((recs = skStreamRead(stream_in, &key, sizeof(skBag_key_t))) > 0) { if (f_swap_byte_order) { key = BSWAP32(key); } switch (bag_version) { case 1: /* version 1 had 32 bit counters */ recs = skStreamRead(stream_in, &counter_32, sizeof(uint32_t)); if (f_swap_byte_order) { counter = (skBag_counter_t)BSWAP32(counter_32); } else { counter = (skBag_counter_t)counter_32; } break; case 2: case 3: /* versions 2 and 3 are identical and have 64 bit * counters; v3 supports compression on write */ recs = skStreamRead(stream_in, &counter, sizeof(skBag_counter_t)); if (f_swap_byte_order) { counter = BSWAP64(counter); } break; case 0: default: assert(0); abort(); } if (recs <= 0) { skAppPrintErr("Could not read counter for key"); if (recs == -1) { skStreamPrintLastErr(stream_in, recs, &skAppPrintErr); } return SKBAG_ERR_INPUT; } err = bag_fn(bag, &key, &counter); if (err != SKBAG_OK) { skAppPrintErr("Error %u processing key-counter pair", err); return err; } } if (recs == -1) { skStreamPrintLastErr(stream_in, recs, &skAppPrintErr); return SKBAG_ERR_INPUT; } return SKBAG_OK; } /* * status = _bagReadHeader(stream, &bag_version, &swap_flag); * * Read the generic header for the SiLK bag from 'stream'. Set * '*bag_version' to the version of the bag. Set '*swap_flag' to 1 * if the key/counter pairs in the bag must be byte swapped on * read, or 0 otherwise. */ static skBag_err_t _bagReadHeader( skstream_t *stream_in, fileVersion_t *bag_version, int *swap_flag) { genericHeader file_header; int rv; assert(stream_in); assert(bag_version); assert(swap_flag); rv = skStreamReadSilkHeader(stream_in, &file_header, sizeof(file_header)); if (rv) { skStreamPrintLastErr(stream_in, rv, &skAppPrintErr); return SKBAG_ERR_INPUT; } if (FT_RWBAG != skStreamGetSilkFormat(stream_in)) { skAppPrintErr("Input file %s is not a SiLK Bag file", skStreamGetPathname(stream_in)); rv = SKBAG_ERR_INPUT; } *bag_version = skStreamGetSilkVersion(stream_in); if (*bag_version == 0 || *bag_version > RWBAG_FILE_VERSION) { skAppPrintErr("Bag file version %d not supported", file_header.version); return SKBAG_ERR_INPUT; } if ((*bag_version < 2) && (SK_COMPMETHOD_NONE != skStreamGetCompressionMethod(stream_in))) { skAppPrintErr("Bag files prior to v2 do not support compression"); return SKBAG_ERR_INPUT; } *swap_flag = !skStreamIsNativeByteOrder(stream_in); return SKBAG_OK; } /* * _bagNodesFree(bag); * * Given the bag 'bag', free all the nodes that make up the bag's * tree by iterating through the nodes and leaves. */ static void _bagNodesFree(skBag_header_t *bag) { skBag_node_t *working_node[MAX_LEVELS]; uint32_t working_index[MAX_LEVELS]; uint32_t working_max[MAX_LEVELS]; skBag_level_t current_level; skBag_node_t *current_node; uint32_t current_index; uint32_t current_max; skBag_level_t leaf_level; assert(bag != NULL); leaf_level = bag->levels - 2; current_level = 0; current_node = bag->root.child; current_index = 0; current_max = SKBAG_GET_LEVEL_BLOCKS(bag, current_level); if (current_node == NULL) { return; } while (1) { if (current_level >= leaf_level) { /* we are in a node pointing to leaves */ for (; current_index < current_max; ++current_index) { if (current_node[current_index].leaf) { free(current_node[current_index].leaf); } } } else { /* we are in a node pointing to more nodes */ /* Find the next non-empty node */ while ((current_index < current_max) && (current_node[current_index].child == NULL)) { current_index++; } /* Push information about the current level's state, and move to the next level */ if (current_index < current_max) { working_index[current_level] = current_index; working_node[current_level] = current_node; working_max[current_level] = current_max; current_level++; current_node = current_node[current_index].child; current_index = 0; current_max = SKBAG_GET_LEVEL_BLOCKS(bag, current_level); } } if (current_index >= current_max) { /* Free the node we were working on */ free(current_node); /* Are we finished? */ if (current_level == 0) { break; } /* Pop the last level */ current_level--; current_node = working_node[current_level]; current_index = working_index[current_level] + 1; current_max = working_max[current_level]; } } /* while (1) */ } /* * counter = _bagTraverseSubtree(iter, subtree, lvl, at_start); * * Walk across the subtree at 'subtree', whose level in the bag * is 'lvl', and find the next non-zero counter. If the iterator * has f_skip_counter set to true, skip the first counter---whether * empty or not. If skip_counter is false, then start at the first * counter. * * 'at_start' is a boolean value. If true, start offset from the * apropriate entry in offset_path. Also, when making recursive * calls, only set to true once; that way, the leftmost path of the * tree uses offsets, but once beyond that, search whole blocks. * * Returns a pointer to the next counter found, or NULL if no * counter was found. * * Also, sets iter->offset_path to point to the counter found, * and f_skip_counter if a counter is found, so that it doesn't * re-find that counter on the next call. */ static skBag_counter_t *_bagTraverseSubtree( skBag_iterator_t *iter, const skBag_node_t *subtree, skBag_level_t lvl, int f_use_iterator_start) { skBag_blocksize_t block_cur; skBag_blocksize_t block_size; skBag_counter_t *counter = NULL; /* check input */ assert(iter != NULL); assert(iter->bag != NULL); assert(iter->offset_path != NULL); assert(subtree != NULL); assert(lvl < iter->bag->levels); if (subtree->child == NULL) { return NULL; } if (f_use_iterator_start) { block_cur = iter->offset_path[lvl]; } else { block_cur = (skBag_blocksize_t) 0; } block_size = SKBAG_GET_LEVEL_BLOCKS(iter->bag, lvl); /* * if we are at (or past!) the last node in the leaf, the counter * will not be found in this subtree */ if (block_cur >= block_size) { return NULL; } if (lvl < iter->bag->levels - 1) { /* * if we are not at the bottom level of nodes, for each * non-null child pointer, recurse one level deeper. */ for (; block_cur < block_size; ++block_cur) { if (subtree->child[block_cur].child != NULL) { /* * if the counter is NULL, there is no subtree from * this offset. */ counter = _bagTraverseSubtree(iter, &subtree->child[block_cur], lvl + 1, f_use_iterator_start); f_use_iterator_start = 0; } /* * at this point, counter points to a found counter, or * still is NULL. if it points to a valid counter, set * the iterator path for the current level, and return to * the next shallow level. */ if (counter != NULL) { iter->offset_path[lvl] = block_cur; return counter; } } } else { /* at the leaf level, so check for non-zero counters */ /* * if we follow the path straight down, we will re-find the * same counter we found on the last iteration. to avoid * that, we skip the first counter on that straight path. */ if (iter->f_skip_counter == 1) { ++block_cur; iter->f_skip_counter = 0; } for (; block_cur < block_size; ++block_cur) { if ( !SKBAG_COUNTER_IS_NULL(&subtree->leaf[block_cur])) { /* we found a counter. */ iter->offset_path[lvl] = block_cur; if (lvl == iter->bag->levels - 1) { iter->f_skip_counter = 1; } return &subtree->leaf[block_cur]; } } } /* * no counter found. if we are at the most shallow level, reset * the iterator (so subsequent calls will re-start at the * beginning of the tree.) */ if (lvl == 0) { skBag_resetIterator(iter); } return NULL; } /* * _bagUpdateContentStats(bag, key, counter); * * Update the max_counter, min_key, max_key statistics for the tree * 'bag' where the counter for the key 'key' was just set to * '*counter'. * * Note that statistics bounds only tighten; there is no way to * loosen the value if a counter is set outside the bounds. since * statistics are generated every time the bagtree is created in * memory, serializing and deserializing will correct the bounds. */ static void _bagUpdateContentStats( skBag_header_t *bag, const skBag_key_t *key, const skBag_counter_t *counter) { assert(bag != NULL); assert(bag->stats != NULL); if (*counter > bag->stats->max_counter) { bag->stats->max_counter = *counter; } if (*key > bag->stats->max_key) { bag->stats->max_key = *key; } if (*key < bag->stats->min_key) { bag->stats->min_key = *key; } }