/* set.c -- Set routines for Khepera * Created: Wed Nov 9 13:31:24 1994 by faith@dict.org * Revised: Sat Mar 30 12:04:42 2002 by faith@dict.org * Copyright 1994-1997, 2002 Rickard E. Faith (faith@dict.org) * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Library General Public License as published * by the Free Software Foundation; either version 2 of the License, or (at * your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: set.c,v 1.17 2002/08/02 19:43:15 faith Exp $ * * \section{Set Routines} * * \intro The set implementation is similar to the hash table * implementation, except that the set object does not associate a |datum| * with a |key|. For sets, keys are called elements. All of the hash * table functions are supported, with the addition of a membership test * and several set operations. * * The underlying data structure is a hash table of prime length, with * self-organizing linked lists \cite[pp.~398--9]{faith:Knuth73c} used for * collision resolution. The hash table automatically grows as necessary * to preserve efficient access. * */ #include "maaP.h" typedef struct bucket { const void *elem; unsigned int hash; struct bucket *next; } *bucketType; typedef struct set { #if MAA_MAGIC int magic; #endif unsigned long prime; unsigned long entries; bucketType *buckets; unsigned long resizings; unsigned long retrievals; unsigned long hits; unsigned long misses; unsigned long (*hash)( const void * ); int (*compare)( const void *, const void * ); int readonly; } *setType; static void _set_check( setType t, const char *function ) { if (!t) err_internal( function, "set is null\n" ); #if MAA_MAGIC if (t->magic != SET_MAGIC) err_internal( function, "Bad magic: 0x%08x (should be 0x%08x)\n", t->magic, SET_MAGIC ); #endif } static set_Set _set_create( unsigned long seed, set_HashFunction hash, set_CompareFunction compare ) { setType t; unsigned long i; unsigned long prime = prm_next_prime( seed ); t = xmalloc( sizeof( struct set ) ); #if MAA_MAGIC t->magic = SET_MAGIC; #endif t->prime = prime; t->entries = 0; t->buckets = xmalloc( t->prime * sizeof( struct bucket ) ); t->resizings = 0; t->retrievals = 0; t->hits = 0; t->misses = 0; t->hash = hash ? hash : hsh_string_hash; t->compare = compare ? compare : hsh_string_compare; t->readonly = 0; for (i = 0; i < t->prime; i++) t->buckets[i] = NULL; return t; } /* \doc The |set_create| function initilizes a set object. Elements are pointers to "void". The internal representation of the set will grow automatically when an insertion is performed and the table is more than half full. The |hash| function should take a pointer to a |elem| and return an "unsigned int". If |hash| is "NULL", then the |elem| is assumed to be a pointer to a null-terminated string, and the function shown in \grindref{fig:hshstringhash} will be used for |hash|. The |compare| function should take a pair of pointers to elements and return zero if the elements are the same and non-zero if they are different. If |compare| is "NULL", then the elements are assumed to point to null-terminated strings, and the |strcmp| function will be used for |compare|. Additionally, the |hsh_pointer_hash| and |hsh_pointer_compare| functions are available and can be used to treat the \emph{value} of the "void" pointer as the element. These functions are often useful for maintaining sets of objects. */ set_Set set_create( set_HashFunction hash, set_CompareFunction compare ) { return _set_create( 0, hash, compare ); } set_HashFunction set_get_hash( set_Set set ) { setType t = (setType)set; _set_check( t, __FUNCTION__ ); return t->hash; } set_CompareFunction set_get_compare( set_Set set ) { setType t = (setType)set; _set_check( t, __FUNCTION__ ); return t->compare; } static void _set_destroy_buckets( set_Set set ) { unsigned long i; setType t = (setType)set; _set_check( t, __FUNCTION__ ); for (i = 0; i < t->prime; i++) { bucketType b = t->buckets[i]; while (b) { bucketType next = b->next; xfree( b ); /* terminal */ b = next; } } xfree( t->buckets ); /* terminal */ t->buckets = NULL; } static void _set_destroy_table( set_Set set ) { setType t = (setType)set; _set_check( t, __FUNCTION__ ); #if MAA_MAGIC t->magic = SET_MAGIC_FREED; #endif xfree( t ); /* terminal */ } /* \doc |set_destroy| frees all of the memory associated with the set object. The memory used by elements is \emph{not} freed---this memory is the responsibility of the user. However, a call to |set_iterate| can be used to free this memory \emph{immediately} before a call to |set_destroy|. */ void set_destroy( set_Set set ) { setType t = (setType)set; _set_check( t, __FUNCTION__ ); if (t->readonly) err_internal( __FUNCTION__, "Attempt to destroy readonly set\n" ); _set_destroy_buckets( set ); _set_destroy_table( set ); } static void _set_insert( set_Set set, unsigned int hash, const void *elem ) { setType t = (setType)set; unsigned long h = hash % t->prime; bucketType b; _set_check( t, __FUNCTION__ ); b = xmalloc( sizeof( struct bucket ) ); b->hash = hash; b->elem = elem; b->next = NULL; if (t->buckets[h]) b->next = t->buckets[h]; t->buckets[h] = b; ++t->entries; } /* \doc |set_insert| inserts a new |elem| into the |set|. If the insertion is successful, zero is returned. If the |elem| already exists, 1 is returned. If the internal representation of the set becomes more than half full, its size is increased automatically. At present, this requires that all of the element pointers are copied into a new set. Rehashing is not required, however, since the hash values are stored for each element. */ int set_insert( set_Set set, const void *elem ) { setType t = (setType)set; unsigned long hashValue = t->hash( elem ); unsigned long h; _set_check( t, __FUNCTION__ ); if (t->readonly) err_internal( __FUNCTION__, "Attempt to insert into readonly set\n" ); /* Keep table less than half full */ if (t->entries * 2 > t->prime) { setType new = _set_create( t->prime * 3, t->hash, t->compare ); unsigned long i; for (i = 0; i < t->prime; i++) { if (t->buckets[i]) { bucketType pt; for (pt = t->buckets[i]; pt; pt = pt->next) _set_insert( new, pt->hash, pt->elem ); } } /* fixup values */ _set_destroy_buckets( t ); t->prime = new->prime; t->buckets = new->buckets; _set_destroy_table( new ); ++t->resizings; } h = hashValue % t->prime; if (t->buckets[h]) { /* Assert uniqueness */ bucketType pt; for (pt = t->buckets[h]; pt; pt = pt->next) if (!t->compare( pt->elem, elem )) return 1; } _set_insert( t, hashValue, elem ); return 0; } /* \doc |set_delete| removes an |elem| from the |set|. Zero is returned if the |elem| was present. Otherwise, 1 is returned. */ int set_delete( set_Set set, const void *elem ) { setType t = (setType)set; unsigned long h = t->hash( elem ) % t->prime; _set_check( t, __FUNCTION__ ); if (t->readonly) err_internal( __FUNCTION__, "Attempt to delete from readonly set\n" ); if (t->buckets[h]) { bucketType pt; bucketType prev; for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next) if (!t->compare( pt->elem, elem )) { --t->entries; if (!prev) t->buckets[h] = pt->next; else prev->next = pt->next; xfree( pt ); return 0; } } return 1; } /* \doc |set_member| returns 1 if |elem| is in |set|. Otherwise, zero is returned. */ int set_member( set_Set set, const void *elem ) { setType t = (setType)set; unsigned long h = t->hash( elem ) % t->prime; _set_check( t, __FUNCTION__ ); ++t->retrievals; if (t->buckets[h]) { bucketType pt; bucketType prev; for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next) if (!t->compare( pt->elem, elem )) { if (!prev) { ++t->hits; } else if (!t->readonly) { /* Self organize */ prev->next = pt->next; pt->next = t->buckets[h]; t->buckets[h] = pt; } return 1; } } ++t->misses; return 0; } /* \doc |set_iterate| is used to iterate a function over every |elem| in the |set|. The function, |iterator|, is passed each |elem|. If |iterator| returns a non-zero value, the iterations stop, and |set_iterate| returns. Note that the elements are in some arbitrary order, and that this order may change between two successive calls to |set_iterate|. */ int set_iterate( set_Set set, int (*iterator)( const void *elem ) ) { setType t = (setType)set; unsigned long i; int savedReadonly; _set_check( t, __FUNCTION__ ); savedReadonly = t->readonly; t->readonly = 1; for (i = 0; i < t->prime; i++) { if (t->buckets[i]) { bucketType pt; for (pt = t->buckets[i]; pt; pt = pt->next) if (iterator( pt->elem )) { t->readonly = savedReadonly; return 1; } } } t->readonly = savedReadonly; return 0; } /* \doc |set_iterate_arg| is used to iterate a function over every |elem| in the |set|. The function, |iterator|, is passed each |elem|. If |iterator| returns a non-zero value, the iterations stop, and |set_iterate| returns. Note that the elements are in some arbitrary order, and that this order may change between two successive calls to |set_iterate|. */ int set_iterate_arg( set_Set set, int (*iterator)( const void *elem, void *arg ), void *arg ) { setType t = (setType)set; unsigned long i; int savedReadonly; _set_check( t, __FUNCTION__ ); savedReadonly = t->readonly; t->readonly = 1; for (i = 0; i < t->prime; i++) { if (t->buckets[i]) { bucketType pt; for (pt = t->buckets[i]; pt; pt = pt->next) if (iterator( pt->elem, arg )) { t->readonly = savedReadonly; return 1; } } } t->readonly = savedReadonly; return 0; } /* \doc |set_init_position| returns a position marker for some arbitary first element in the set. This marker can be used with |set_next_position| and |set_get_position|. */ set_Position set_init_position( set_Set set ) { setType t = (setType)set; unsigned long i; _set_check( t, __FUNCTION__ ); for (i = 0; i < t->prime; i++) if (t->buckets[i]) { t->readonly = 1; return t->buckets[i]; } return NULL; } /* \doc |set_next_position| returns a position marker for the next element in the set. Elements are in arbitrary order based on their positions in the hash table. */ set_Position set_next_position( set_Set set, set_Position position ) { setType t = (setType)set; bucketType b = (bucketType)position; unsigned long i; unsigned long h; _set_check( t, __FUNCTION__ ); if (!b) { t->readonly = 0; return NULL; } if (b->next) return b->next; for (h = b->hash % t->prime, i = h + 1; i < t->prime; i++) if (t->buckets[i]) return t->buckets[i]; t->readonly = 0; return NULL; } /* \doc |set_get_position| returns the element associated with the |position| marker, or "NULL" if there is no such element. */ void *set_get_position( set_Position position ) { bucketType b = (bucketType)position; if (!b) return NULL; return (void *)b->elem; /* Discard const */ } /* \doc |set_readonly| sets the |readonly| flag for the |set| to |flag|. |flag| should be 0 or 1. The value of the previous flag is returned. When a set is marked as readonly, self-organization of the bucket-overflow lists will not take place, and any attempt to modify the list (e.g., insertion or deletion) will result in an error. */ int set_readonly( set_Set set, int flag ) { setType t = (setType)set; int current; _set_check( t, __FUNCTION__ ); current = t->readonly; t->readonly = flag; return current; } /* \doc |set_add| returns |set1|, which now contains all of the elements in |set1| and |set2|. Only pointers to elements are copied, \emph{not} the data pointed (this has memory management implications). The |hash| and |compare| functions must be identical for the two sets. |set2| is not changed. */ set_Set set_add( set_Set set1, set_Set set2 ) { setType t1 = (setType)set1; setType t2 = (setType)set2; unsigned long i; _set_check( t1, __FUNCTION__ ); _set_check( t2, __FUNCTION__ ); if (t1->hash != t2->hash) err_fatal( __FUNCTION__, "Sets do not have identical hash functions\n" ); if ( t1->compare != t2->compare ) err_fatal( __FUNCTION__, "Sets do not have identical comparison functions\n" ); for (i = 0; i < t2->prime; i++) { if (t2->buckets[i]) { bucketType pt; for (pt = t2->buckets[i]; pt; pt = pt->next) set_insert( set1, pt->elem ); } } return set1; } /* \doc |set_del| returns |set1|, which now contains all of the elements in |set1| other than those in |set2|. Only pointers to elements are copied, \emph{not} the data pointed (this has memory management implications). The |hash| and |compare| functions must be identical for the two sets. |set2| is not changed. */ set_Set set_del( set_Set set1, set_Set set2 ) { setType t1 = (setType)set1; setType t2 = (setType)set2; unsigned long i; _set_check( t1, __FUNCTION__ ); _set_check( t2, __FUNCTION__ ); if (t1->hash != t2->hash) err_fatal( __FUNCTION__, "Sets do not have identical hash functions\n" ); if ( t1->compare != t2->compare ) err_fatal( __FUNCTION__, "Sets do not have identical comparison functions\n" ); for (i = 0; i < t2->prime; i++) { if (t2->buckets[i]) { bucketType pt; for (pt = t2->buckets[i]; pt; pt = pt->next) set_delete( set1, pt->elem ); } } return set1; } /* \doc |set_union| returns a new set which is the union of |set1| and |set2|. Only pointers to elements are copied, \emph{not} the data pointed (this has memory management implications). The |hash| and |compare| functions must be identical for the two sets. */ set_Set set_union( set_Set set1, set_Set set2 ) { setType t1 = (setType)set1; setType t2 = (setType)set2; set_Set set; unsigned long i; _set_check( t1, __FUNCTION__ ); _set_check( t2, __FUNCTION__ ); if (t1->hash != t2->hash) err_fatal( __FUNCTION__, "Sets do not have identical hash functions\n" ); if ( t1->compare != t2->compare ) err_fatal( __FUNCTION__, "Sets do not have identical comparison functions\n" ); set = set_create( t1->hash, t1->compare ); for (i = 0; i < t1->prime; i++) { if (t1->buckets[i]) { bucketType pt; for (pt = t1->buckets[i]; pt; pt = pt->next) set_insert( set, pt->elem ); } } for (i = 0; i < t2->prime; i++) { if (t2->buckets[i]) { bucketType pt; for (pt = t2->buckets[i]; pt; pt = pt->next) set_insert( set, pt->elem ); } } return set; } /* \doc |set_inter| returns a new set which is the intersection of |set1| and |set2|. Only pointers to elements are copied, \emph{not} the data pointed (this has memory management implications). The |hash| and |compare| functions must be identical for the two sets. */ set_Set set_inter( set_Set set1, set_Set set2 ) { setType t1 = (setType)set1; setType t2 = (setType)set2; set_Set set; unsigned long i; int savedReadonly; _set_check( t1, __FUNCTION__ ); _set_check( t2, __FUNCTION__ ); if (t1->hash != t2->hash) err_fatal( __FUNCTION__, "Sets do not have identical hash functions\n" ); if ( t1->compare != t2->compare ) err_fatal( __FUNCTION__, "Sets do not have identical comparison functions\n" ); set = set_create( t1->hash, t1->compare ); savedReadonly = t2->readonly; t2->readonly = 1; /* save time on set_member */ for (i = 0; i < t1->prime; i++) { if (t1->buckets[i]) { bucketType pt; for (pt = t1->buckets[i]; pt; pt = pt->next) if (set_member( t2, pt->elem )) set_insert( set, pt->elem ); } } t2->readonly = savedReadonly; return set; } /* \doc |set_diff| returns a new set which is the difference resulting from removing every element in |set2| from the elements in |set1|. Only pointers to elements are copied, \emph{not} the data pointed (this has memory management implications). The |hash| and |compare| functions must be identical for the two sets. */ set_Set set_diff( set_Set set1, set_Set set2 ) { setType t1 = (setType)set1; setType t2 = (setType)set2; set_Set set; unsigned long i; int savedReadonly; _set_check( t1, __FUNCTION__ ); _set_check( t2, __FUNCTION__ ); if (t1->hash != t2->hash) err_fatal( __FUNCTION__, "Sets do not have identical hash functions\n" ); if ( t1->compare != t2->compare ) err_fatal( __FUNCTION__, "Sets do not have identical comparison functions\n" ); set = set_create( t1->hash, t1->compare ); savedReadonly = t2->readonly; t2->readonly = 1; /* save time on set_member */ for (i = 0; i < t1->prime; i++) { if (t1->buckets[i]) { bucketType pt; for (pt = t1->buckets[i]; pt; pt = pt->next) if (!set_member( t2, pt->elem )) set_insert( set, pt->elem ); } } t2->readonly = savedReadonly; return set; } /* \doc |set_equal| returns non-zero if |set1| and |set2| contain the same number of elements, and all of the elements in |set1| are also in |set2|. The |hash| and |compare| functions must be identical for the two sets. */ int set_equal( set_Set set1, set_Set set2 ) { setType t1 = (setType)set1; setType t2 = (setType)set2; unsigned long i; int savedReadonly; _set_check( t1, __FUNCTION__ ); _set_check( t2, __FUNCTION__ ); if (t1->hash != t2->hash) err_fatal( __FUNCTION__, "Sets do not have identical hash functions\n" ); if ( t1->compare != t2->compare ) err_fatal( __FUNCTION__, "Sets do not have identical comparison functions\n" ); if (t1->entries != t2->entries) return 0; /* not equal */ savedReadonly = t2->readonly; t2->readonly = 1; /* save time on set_member */ for (i = 0; i < t1->prime; i++) { if (t1->buckets[i]) { bucketType pt; for (pt = t1->buckets[i]; pt; pt = pt->next) if (!set_member( t2, pt->elem )) { t2->readonly = savedReadonly; return 0; /* not equal */ } } } t2->readonly = savedReadonly; return 1; /* equal */ } int set_count( set_Set set ) { setType t = (setType)set; _set_check( t, __FUNCTION__ ); return t->entries; } /* \doc |set_get_stats| returns statistics about the |set|. The |set_Stats| structure is shown in \grind{set_Stats}. */ set_Stats set_get_stats( set_Set set ) { setType t = (setType)set; set_Stats s = xmalloc( sizeof( struct set_Stats ) ); unsigned long i; unsigned long count; _set_check( t, __FUNCTION__ ); s->size = t->prime; s->resizings = t->resizings; s->entries = 0; s->buckets_used = 0; s->singletons = 0; s->maximum_length = 0; s->retrievals = t->retrievals; s->hits = t->hits; s->misses = t->misses; for (i = 0; i < t->prime; i++) { if (t->buckets[i]) { bucketType pt; ++s->buckets_used; for (count = 0, pt = t->buckets[i]; pt; ++count, pt = pt->next); if (count == 1) ++s->singletons; s->maximum_length = max( s->maximum_length, count ); s->entries += count; } } if ( t->entries != s->entries ) err_internal( __FUNCTION__, "Incorrect count for entries: %lu vs. %lu\n", t->entries, s->entries ); return s; } /* \doc |set_print_stats| prints the statistics for |set| on the specified |stream|. If |stream| is "NULL", then "stdout" will be used. */ void set_print_stats( set_Set set, FILE *stream ) { setType t = (setType)set; FILE *str = stream ? stream : stdout; set_Stats s = set_get_stats( t ); _set_check( t, __FUNCTION__ ); fprintf( str, "Statistics for set at %p:\n", set ); fprintf( str, " %lu resizings to %lu total\n", s->resizings, s->size ); fprintf( str, " %lu entries (%lu buckets used, %lu without overflow)\n", s->entries, s->buckets_used, s->singletons ); fprintf( str, " maximum list length is %lu", s->maximum_length ); if (s->buckets_used) fprintf( str, " (optimal is %.1f)\n", (double)s->entries / (double)s->buckets_used ); else fprintf( str, "\n" ); fprintf( str, " %lu retrievals (%lu from top, %lu failed)\n", s->retrievals, s->hits, s->misses ); xfree( s ); /* rare */ }