/*****************************************************************************/ /*! *\file hash_table.h *\brief hash table implementation * * Author: Alexander Fuchs * * Created: Thu Oct 19 11:04:00 2006 * *
* * License to use, copy, modify, sell and/or distribute this software * and its documentation for any purpose is hereby granted without * royalty, subject to the terms and conditions defined in the \ref * LICENSE file provided with this distribution. * *
*/ /*****************************************************************************/ /* * Copyright (c) 1996,1997 * Silicon Graphics Computer Systems, Inc. * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Silicon Graphics makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied warranty. * * * Copyright (c) 1994 * Hewlett-Packard Company * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Hewlett-Packard Company makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied warranty. * */ // this implementation is in essence a subset of the SGI implementation: // http://www.sgi.com/tech/stl/stl_hashtable.h #ifndef _cvc3__hash__hash_table_h_ #define _cvc3__hash__hash_table_h_ #include #include #include #include #include "hash_fun.h" // For some reason, including debug.h doesn't work--so redeclare macros here #ifdef DEBUG #define DebugAssert(cond, str) if(!(cond)) \ CVC3::debugError(__FILE__, __LINE__, #cond, str) namespace CVC3 { extern void debugError(const std::string& file, int line, const std::string& cond, const std::string& msg); } #else #define DebugAssert(cond, str) #endif namespace Hash { // get size_t from hash_fun, which gets it from cstddef typedef size_t size_type; /// primes for increasing the hash table size // Note: assumes size_type is unsigned and at least 32 bits. const size_type num_primes = 28; static const size_type prime_list[num_primes] = { 53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul }; inline size_type next_prime(size_type n) { const size_type* first = prime_list; const size_type* last = prime_list + (size_type)num_primes; const size_type* pos = std::lower_bound(first, last, n); return pos == last ? *(last - 1) : *pos; } /*! template to instante to hash map and hash set based on the sgi implementation: http://www.sgi.com/tech/stl/HashedAssociativeContainer.html _Key: hash key type _Data: key + value data to store _HashFcn: functional class providing a hash function: int(_Key) Note: in some STL implementations hash is already part of some extension an in namespace std or stdext, in some it is not. So we assume that it is not available. :TODO: _EqualKey: functional class providing a comparison function: bool(_Key, _Key) returns true iff two keys are considered to be equal _ExtractKey: extracts key from _Data: _Key(_Data) */ // // can't use debug.h as debug.h uses hash maps... // // implemented as an array of lists (buckets) template class hash_table { /// types public: // interface typedefs typedef Hash::size_type size_type; typedef _Key key_type; typedef _Value value_type; typedef _HashFcn hasher; typedef _EqualKey key_equal; protected: // a bucket is a list of values // using an STL list makes it more complicated to have a nice end iterator, // as iterators of different lists can not be compared // (at least in the MS STL). // so instead we implement our own single-linked list here, // where NULL is the end iterator for all lists. struct BucketNode { BucketNode(BucketNode* next, const value_type& value) : d_next(next), d_value(value) { }; BucketNode* d_next; value_type d_value; }; typedef BucketNode Bucket; // the buckets are kept in an array typedef std::vector Data; typedef typename Data::iterator data_iter; typedef typename Data::const_iterator data_const_iter; public: // iterators class iterator; friend class iterator; class const_iterator; friend class const_iterator; /// variables protected: /// template parameters // the hash function for a key hasher d_hash; // the equality function between keys key_equal d_equal; // extraction of key from data _ExtractKey d_extractKey; // the current number of elements - stored for efficiency size_type d_size; // the hash table - an array of buckets Data d_data; /// methods protected: /// template parameters // the hash function for a key size_type hash(const key_type& key) const { return d_hash(key); } // equality between keys bool equal(const key_type& key1, const key_type& key2) const { return d_equal(key1, key2); } // extraction of a key const key_type& extractKey(const value_type& value) const { return d_extractKey(value); } /// bucket retrieval // returns the index in the array which contains the bucket // with the keys mapping to the same hash value as key size_type getBucketIndex(const key_type& key) const { return (hash(key) % d_data.size()); } Bucket* getBucketByKey(const key_type& key) { return getBucketByIndex(getBucketIndex(key)); } const Bucket* getBucketByKey(const key_type& key) const { return getBucketByIndex(getBucketIndex(key)); } Bucket* getBucketByIndex(const size_type index) { DebugAssert(index < d_data.size(),"hash_table::getBucketByIndex"); return d_data.at(index); } const Bucket* getBucketByIndex(const size_type index) const { DebugAssert(index < d_data.size(),"hash_table::getBucketByIndex (const)"); return d_data.at(index); } /// resize // increase the size of the table, typically if the load factor is too high void resize() { if (load_factor() > 1) { // create new table with doubled size size //size_type size = 2 * d_data.size(); // this is simple, but might not be efficient for bad hash functions, // which for example mainly hash to values which contain 2 as a factor. // thus, instead we go from a prime to a prime of more or less double size. size_type size = next_prime(d_data.size() + 1); Data copy(size, NULL); // move entries to new table for (size_type i = 0; i < d_data.size(); ++i) { // head of current bucket to move BucketNode* bucket = d_data[i]; while (bucket != NULL) { // move head of old bucket BucketNode* current = bucket; bucket = bucket->d_next; // move old head to new bucket size_type new_index = hash(extractKey(current->d_value)) % size; BucketNode* new_bucket = copy[new_index]; current->d_next = new_bucket; copy[new_index] = current; } d_data[i] = NULL; } d_data.swap(copy); } } public: /// constructors // default size is 16 buckets hash_table() : d_hash(_HashFcn()), d_equal(_EqualKey()), d_extractKey(_ExtractKey()), d_size(0), d_data(16) { init(); }; // specifiy initial number of buckets - must be positive hash_table(size_type initial_capacity) : d_hash(_HashFcn()), d_equal(_EqualKey()), d_extractKey(_ExtractKey()), d_size(0), d_data(initial_capacity) { init(); }; // specifiy initial number of buckets and hash function hash_table(size_type initial_capacity, const _HashFcn& hash) : d_hash(hash), d_equal(_EqualKey()), d_extractKey(_ExtractKey()), d_size(0), d_data(initial_capacity) { init(); }; // specifiy initial number of buckets, hash and equal function hash_table(size_type initial_capacity, const _HashFcn& hash, const _EqualKey& equal) : d_hash(hash), d_equal(equal), d_extractKey(_ExtractKey()), d_size(0), d_data(initial_capacity) { init(); }; // copy hash table hash_table(const hash_table& other) : d_hash(other.d_hash), d_equal(other.d_equal), d_extractKey(other.d_extractKey), d_size(other.d_size), d_data(0) { assignTable(other.d_data); }; ~hash_table() { clear(); } // assign hash table hash_table& operator=(const hash_table& other) { if (this != &other) { clear(); d_hash = other.d_hash; d_equal = other.d_equal; d_extractKey = other.d_extractKey; d_size = other.d_size; assignTable(other.d_data); } return *this; } // replaces the current hash table with the given one void assignTable(const Data& data) { // copy elements: // default assignment operator does not work if value_type contains // constants, which should be the case as the key should be constant. // so not even shrinking a vector is possible, // so create a new table instead and swap with the current one. Data tmp(data.size()); d_data.swap(tmp); // for each bucket ... for (size_type i = 0; i < data.size(); ++i) { // .. copy each element DebugAssert(i < d_data.size(),"hash_table::operator="); // copy bucket if not empty Bucket* source = data[i]; if (source != NULL) { // set first element Bucket* target = new BucketNode(NULL, source->d_value); d_data[i] = target; source = source->d_next; // copy remaining nodes while (source != NULL) { target->d_next = new BucketNode(NULL, source->d_value); target = target->d_next; source = source->d_next; } } } } void swap(hash_table& other) { std::swap(d_hash, other.d_hash); std::swap(d_equal, other.d_equal); std::swap(d_extractKey, other.d_extractKey); std::swap(d_size, other.d_size); d_data.swap(other.d_data); } // sets all buckets to NULL void init() { for (size_type i = 0; i < d_data.size(); ++i) { d_data[i] = NULL; } } // empty all buckets void clear() { d_size = 0; for (size_type i = 0; i < d_data.size(); ++i) { BucketNode* head = d_data[i]; while (head != NULL) { BucketNode* next = head->d_next; delete head; head = next; } d_data[i] = NULL; } } /// operations // returns end iterator if key was not bound iterator find(const key_type& key) { for (BucketNode* node = getBucketByKey(key); node != NULL; node = node->d_next) { if (equal(extractKey(node->d_value), key)) { return iterator(this, node); } } return end(); } // const version of find const_iterator find(const key_type& key) const { for (const BucketNode* node = getBucketByKey(key); node != NULL; node = node->d_next) { if (equal(extractKey(node->d_value), key)) { return const_iterator(this, node); } } return end(); } // adds the mapping from key to data, if key is still unbound // otherwise returns false std::pair insert(const value_type& value) { // resize in case we insert resize(); const key_type& key = extractKey(value); size_type index = getBucketIndex(key); // check if key is already bound for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) { if (equal(extractKey(node->d_value), key)) { return std::make_pair(end(), false); } } // insert new value ++d_size; d_data[index] = new BucketNode(d_data[index], value); return std::make_pair(iterator(this, d_data[index]), true); } // if key in value is already bound, // returns that bindings, // otherwise inserts value and returns it. value_type& find_or_insert(const value_type& value) { // resize in case we insert resize(); const key_type& key = extractKey(value); size_type index = getBucketIndex(key); // check if key is already bound for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) { if (equal(extractKey(node->d_value), key)) { return node->d_value; } } // insert new value ++d_size; d_data[index] = new BucketNode(d_data[index], value); return d_data[index]->d_value; } // removes binding of key // returns number of keys removed, // i.e. 1 if key was bound, 0 if key was not bound. size_type erase(const key_type& key) { size_type index = getBucketIndex(key); // keep track of the node previous to the current one BucketNode* prev = NULL; for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) { if (equal(extractKey(node->d_value), key)) { --d_size; // remove the bucket's head if (prev == NULL) { d_data[index] = node->d_next; } // remove within the list; else { prev->d_next = node->d_next; } delete node; return 1; } prev = node; } return 0; } /// status // is the key bound? bool contains(const key_type& key) const { return (find(key) != end()); } // returns the number of times a key is bound, // i.e. 0 or 1 size_type count(const _Key& key) const { if (contains(key)) { return 1; } else { return 0; } } // is the hash table empty? bool empty() const { return (d_size == 0); } // the number of elements in the hash table size_type size() const { return d_size; } // the number of buckets in the hash table size_type bucket_count() const { return d_data.size(); } // returns the average number of elements per bucket float load_factor() const { return (float(d_size) / float(d_data.size())); } /// iterators // returns forward iterator to iterate over all key/data pairs iterator begin() { if (d_size > 0) { // find first non-empty bucket size_type index = 0; while (d_data[index] == NULL) { ++index; } return iterator(this, d_data[index]); } else { return end(); } } // const version of begin const_iterator begin() const { if (d_size > 0) { // find first non-empty bucket size_type index = 0; while (d_data[index] == NULL) { ++index; } return const_iterator(this, d_data[index]); } else { return end(); } } // returns end iterator iterator end() { return iterator(this, NULL); } // const version of end const_iterator end() const { return const_iterator(this, NULL); } /// inner classes // iterator over hashtable elements // modifying the hash table leaves iterator intact, // unless the key in the value it points to is modified/deleted class iterator { friend class hash_table; friend class const_iterator; /// variables protected: // the hash table of this iterator hash_table* d_hash_table; // iterator points to current element in some bucket BucketNode* d_node; /// methods protected: // used by hash_table to create an iterator iterator(hash_table* hash_table, BucketNode* node) : d_hash_table(hash_table), d_node(node) { } public: // public default constructor, // leaves the iterator in undefined state. iterator() : d_hash_table(NULL), d_node(NULL) { } // copy constructor iterator(const iterator& other) : d_hash_table(other.d_hash_table), d_node(other.d_node) { } // assignment iterator& operator=(const iterator& other) { if (this != &other) { d_hash_table = other.d_hash_table; d_node = other.d_node; } return *this; } // go to next data - pre-increment iterator& operator++() { // must not be the end iterator DebugAssert(d_node != NULL, "hash operator++"); // get current bucket index size_type index = d_hash_table->getBucketIndex(d_hash_table->extractKey(d_node->d_value)); // go to next entry in bucket d_node = d_node->d_next; // while current bucket empty while (d_node == NULL) { // go to next bucket ++index; // all buckets exhausted if (index == d_hash_table->d_data.size()) { // unfortunately this does not work, as end() returns a tmp value // return d_hash_table->end(); *this = d_hash_table->end(); return *this; } DebugAssert(index < d_hash_table->d_data.size(), "hash operator++ 2"); d_node = d_hash_table->getBucketByIndex(index); } return *this; }; // go to next data - post-increment iterator operator++(int) { iterator tmp = *this; ++(*this); return tmp; } value_type& operator*() const { return d_node->d_value; } value_type* operator->() const { return &(operator*()); } // are two iterator identical? bool operator==(const iterator& other) const { // if the same bucket iterator, then it must be the same hash table DebugAssert(d_node == NULL || d_node != other.d_node || d_hash_table == other.d_hash_table, "hash operator=="); return (d_node == other.d_node); } // negation of == bool operator!=(const iterator& other) const { return !(*this == other); } }; // const iterator over hashtable elements // modifying the hash table leaves iterator intact, // unless the key in the value it points to is modified/deleted class const_iterator { friend class hash_table; /// variables protected: // the hash table of this iterator const hash_table* d_hash_table; // iterator points to current element in some bucket const BucketNode* d_node; /// methods protected: // used by hash_table to create an iterator const_iterator(hash_table const* hash_table, const BucketNode* node) : d_hash_table(hash_table), d_node(node) { } public: // public default constructor, // leaves the iterator in undefined state. const_iterator() : d_hash_table(NULL), d_node(NULL) { } // copy constructor const_iterator(const const_iterator& other) : d_hash_table(other.d_hash_table), d_node(other.d_node) { } // conversion constructor from non-const iterator const_iterator(const iterator& other) : d_hash_table(other.d_hash_table), d_node(other.d_node) { } // assignment const_iterator& operator=(const const_iterator& other) { if (this != &other) { d_hash_table = other.d_hash_table; d_node = other.d_node; } return *this; } // go to next data - pre-increment const_iterator& operator++() { // must not be the end iterator DebugAssert(d_node != NULL, ""); // get current bucket index size_type index = d_hash_table->getBucketIndex(d_hash_table->extractKey(d_node->d_value)); // go to next entry in bucket d_node = d_node->d_next; // while current bucket empty while (d_node == NULL) { // go to next bucket ++index; // all buckets exhausted if (index == d_hash_table->d_data.size()) { // unfortunately this does not work, as end() returns a tmp value // return d_hash_table->end(); *this = d_hash_table->end(); return *this; } DebugAssert(index < d_hash_table->d_data.size(),""); d_node = d_hash_table->getBucketByIndex(index); } return *this; }; // go to next data - post-increment const_iterator operator++(int) { const_iterator tmp = *this; ++(*this); return tmp; } const value_type& operator*() const { return d_node->d_value; } const value_type* operator->() const { return &(operator*()); } // are two iterator identical? bool operator==(const const_iterator& other) const { // if the same bucket iterator, then it must be the same hash table DebugAssert(d_node == NULL || d_node != other.d_node || d_hash_table == other.d_hash_table,""); return (d_node == other.d_node); } // negation of == bool operator!=(const const_iterator& other) const { return !(*this == other); } }; }; } #endif