/* tables.c -- hash tables
Copyright (C) 2000 John Harper <john@dcs.warwick.ac.uk>
$Id: tables.c,v 1.12 2001/08/02 04:50:39 jsh Exp $
This file is part of librep.
librep is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
librep is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with librep; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
/* notes:
The api of this module (except for make-table) was mostly borrowed
from Scheme48. The implementation is all my own fault.. */
#define _GNU_SOURCE
#include "repint.h"
#include <string.h>
#ifdef NEED_MEMORY_H
# include <memory.h>
#endif
typedef unsigned rep_PTR_SIZED_INT hash_value;
typedef struct node_struct node;
struct node_struct {
node *next;
repv key, value;
hash_value hash;
};
typedef struct table_struct table;
struct table_struct {
repv car;
table *next;
int total_buckets, total_nodes;
node **buckets;
repv hash_fun;
repv compare_fun;
repv guardian; /* non-null if a weak table */
};
#define TABLEP(v) rep_CELL16_TYPEP(v, table_type)
#define TABLE(v) ((table *) rep_PTR(v))
static int table_type;
static table *all_tables;
/* ensure X is +ve and in an int */
#define TRUNC(x) (((x) << (rep_VALUE_INT_SHIFT+1)) >> (rep_VALUE_INT_SHIFT+1))
/* type hooks */
static void
table_mark (repv val)
{
int i;
for (i = 0; i < TABLE(val)->total_buckets; i++)
{
node *n;
for (n = TABLE(val)->buckets[i]; n != 0; n = n->next)
{
if (!TABLE(val)->guardian)
rep_MARKVAL(n->key);
rep_MARKVAL(n->value);
}
}
rep_MARKVAL(TABLE(val)->hash_fun);
rep_MARKVAL(TABLE(val)->compare_fun);
rep_MARKVAL(TABLE(val)->guardian);
}
static void
free_table (table *x)
{
int i;
for (i = 0; i < x->total_buckets; i++)
{
node *n, *next;
for (n = x->buckets[i]; n != 0; n = next)
{
next = n->next;
rep_free (n);
}
}
if (x->total_buckets > 0)
rep_free (x->buckets);
rep_FREE_CELL (x);
}
static void
table_sweep (void)
{
table *x = all_tables;
all_tables = 0;
while (x != 0)
{
table *next = x->next;
if (!rep_GC_CELL_MARKEDP (rep_VAL(x)))
free_table (x);
else
{
rep_GC_CLR_CELL (rep_VAL(x));
x->next = all_tables;
all_tables = x;
}
x = next;
}
}
static void
table_print (repv stream, repv arg)
{
rep_stream_puts (stream, "#<table ", -1, rep_FALSE);
rep_princ_val (stream, TABLE(arg)->hash_fun);
rep_stream_putc (stream, ' ');
rep_princ_val (stream, TABLE(arg)->compare_fun);
rep_stream_putc (stream, '>');
}
/* hash functions */
static inline hash_value
hash_string (register u_char *ptr)
{
register hash_value value = 0;
while (*ptr != 0)
value = (value * 33) + *ptr++;
return rep_MAKE_INT (TRUNC (value));
}
DEFUN("string-hash", Fstring_hash, Sstring_hash, (repv string), rep_Subr1) /*
::doc:rep.data.tables#string-hash::
string-hash STRING
Return a positive fixnum somehow related to the contents of STRING,
such that (string= X Y) implies (= (string-hash X) (string-hash Y)).
::end:: */
{
rep_DECLARE1(string, rep_STRINGP);
return hash_string (rep_STR (string));
}
DEFUN("symbol-hash", Fsymbol_hash, Ssymbol_hash, (repv sym), rep_Subr1) /*
::doc:rep.data.tables#symbol-hash::
symbol-hash SYMBOL
Return a positive fixnum somehow related to the name of SYMBOL.
::end:: */
{
rep_DECLARE1(sym, rep_SYMBOLP);
return hash_string (rep_STR (rep_SYM (sym)->name));
}
DEFUN("eq-hash", Feq_hash, Seq_hash, (repv value), rep_Subr1) /*
::doc:rep.data.tables#eq-hash::
eq-hash ARG
Return a positive fixnum somehow related to object ARG, such that (eq X
Y) implies (= (eq-hash X) (eq-hash Y)).
::end:: */
{
hash_value hv = value;
return rep_MAKE_INT (TRUNC (hv));
}
/* XXX This is probably _very_ sub-optimal.. */
DEFUN("equal-hash", Fequal_hash, Sequal_hash, (repv x, repv n_), rep_Subr2) /*
::doc:rep.data.tables#equal-hash::
equal-hash ARG
Return a positive fixnum somehow related to ARG, such that (equal X Y)
implies (= (equal-hash X) (equal-hash Y)).
::end:: */
{
int n = rep_INTP (n_) ? rep_INT (n_) : rep_PTR_SIZED_INT_BITS / 2;
if (rep_CONSP (x))
{
if (n > 0)
{
repv left = Fequal_hash (rep_CAR(x), rep_MAKE_INT (n/2));
repv right = Fequal_hash (rep_CDR(x), rep_MAKE_INT (n/2));
return rep_MAKE_INT ((rep_INT (left) << 1) + rep_INT (right));
}
else
return rep_MAKE_INT (rep_Cons);
}
else if (rep_VECTORP (x) || rep_COMPILEDP (x))
{
hash_value hash = 0;
int i = MIN (n, rep_VECT_LEN (x));
while (i-- > 0)
{
repv tem = Fequal_hash (rep_VECTI (x, i), rep_MAKE_INT (n/2));
hash = hash * 33 + rep_INT (tem);
}
return rep_MAKE_INT (TRUNC (hash));
}
else if (rep_STRINGP (x))
return Fstring_hash (x);
else if (rep_SYMBOLP (x))
return Fsymbol_hash (x);
else if (rep_INTP (x))
{
hash_value hash = rep_INT (x);
return rep_MAKE_INT (TRUNC (hash));
}
else if (rep_NUMBERP (x))
{
hash_value hash = rep_get_long_uint (x);
return rep_MAKE_INT (TRUNC (hash));
}
else
return rep_MAKE_INT (rep_TYPE (x) * 255);
}
/* table functions */
DEFUN("make-table", Fmake_table, Smake_table,
(repv hash_fun, repv cmp_fun, repv is_weak), rep_Subr3) /*
::doc:rep.data.tables#make-table::
make-table HASH-FUNCTION COMPARE-FUNCTION
Create and return a new hash table. When storing and referencing keys
it will use the function HASH-FUNCTION to map keys to hash codes
(positive fixnums), and the predicate function COMPARE-FUNCTION to
compare two keys (should return true if the keys are considered equal).
::end:: */
{
table *tab;
rep_DECLARE(1, hash_fun, Ffunctionp (hash_fun) != Qnil);
rep_DECLARE(2, cmp_fun, Ffunctionp (cmp_fun) != Qnil);
tab = rep_ALLOC_CELL (sizeof (table));
rep_data_after_gc += sizeof (table);
tab->car = table_type;
tab->next = all_tables;
all_tables = tab;
tab->hash_fun = hash_fun;
tab->compare_fun = cmp_fun;
tab->total_buckets = 0;
tab->total_nodes = 0;
tab->guardian = (is_weak == Qnil) ? rep_NULL : Fmake_primitive_guardian ();
return rep_VAL(tab);
}
DEFUN("make-weak-table", Fmake_weak_table, Smake_weak_table,
(repv hash_fun, repv cmp_fun), rep_Subr2) /*
::doc:rep.data.tables#make-weak-table::
make-weak-table HASH-FUNCTION COMPARE-FUNCTION
Similar to `make-table, except that key-value pairs stored in the table
are said to be ``weakly keyed''. That is, they are only retained in the
table as long the key has not been garbage collected.
Unlike with tables created by the `make-table function, the fact that
the key is stored in the table is not considered good enough to prevent
it being garbage collected.
::end:: */
{
return Fmake_table (hash_fun, cmp_fun, Qt);
}
DEFUN("tablep", Ftablep, Stablep, (repv arg), rep_Subr1) /*
::doc:rep.data.tables#tablep::
tablep ARG
Return true if ARG is a hash table.
::end:: */
{
return TABLEP(arg) ? Qt : Qnil;
}
static hash_value
hash_key (repv tab, repv key)
{
repv hash;
if (TABLE(tab)->hash_fun == rep_VAL(&Sstring_hash))
hash = Fstring_hash (key);
else if (TABLE(tab)->hash_fun == rep_VAL(&Ssymbol_hash))
hash = Fsymbol_hash (key);
else if (TABLE(tab)->hash_fun == rep_VAL(&Seq_hash))
hash = Feq_hash (key);
else if (TABLE(tab)->hash_fun == rep_VAL(&Sequal_hash))
hash = Fequal_hash (key, Qnil);
else
{
rep_GC_root gc_tab;
rep_PUSHGC (gc_tab, tab);
hash = rep_call_lisp1 (TABLE(tab)->hash_fun, key);
rep_POPGC;
}
return rep_INT(hash);
}
static inline int
hash_key_to_bin (repv tab, hash_value hash)
{
return hash % TABLE(tab)->total_buckets;
}
static inline rep_bool
compare (repv tab, repv val1, repv val2)
{
repv ret;
rep_GC_root gc_tab;
rep_PUSHGC (gc_tab, tab);
ret = rep_call_lisp2 (TABLE(tab)->compare_fun, val1, val2);
rep_POPGC;
return ret != Qnil;
}
static node *
lookup (repv tab, repv key)
{
hash_value hv;
node *ptr;
int index;
if (TABLE(tab)->total_buckets == 0)
return 0;
hv = hash_key (tab, key);
index = hash_key_to_bin (tab, hv);
for (ptr = TABLE(tab)->buckets[index]; ptr != 0; ptr = ptr->next)
{
if (ptr->hash == hv && compare (tab, key, ptr->key))
return ptr;
}
return 0;
}
DEFUN("table-ref", Ftable_ref, Stable_ref, (repv tab, repv key), rep_Subr2) /*
::doc:rep.data.tables#table-ref::
table-ref TABLE KEY
Return the value stored in hash table TABLE indexed by object KEY.
Returns false if no such value exists.
::end:: */
{
node *n;
rep_DECLARE1(tab, TABLEP);
n = lookup (tab, key);
return n ? n->value : Qnil;
}
DEFUN("table-bound-p", Ftable_bound_p,
Stable_bound_p, (repv tab, repv key), rep_Subr2) /*
::doc:rep.data.tables#table-bound-p::
table-bound-p TABLE KEY
Returns true if the hash table TABLE contains a value associated with
KEY.
::end:: */
{
node *n;
rep_DECLARE1(tab, TABLEP);
n = lookup (tab, key);
return n ? Qt : Qnil;
}
DEFUN("table-set", Ftable_set, Stable_set,
(repv tab, repv key, repv value), rep_Subr3) /*
::doc:rep.data.tables#table-set::
table-set TABLE KEY VALUE
Associate VALUE with KEY in hash table TABLE. Returns VALUE.
::end:: */
{
node *n;
rep_DECLARE1(tab, TABLEP);
n = lookup (tab, key);
if (n == 0)
{
int bin;
n = rep_alloc (sizeof (node));
rep_data_after_gc += sizeof (node);
n->key = key;
n->value = value;
n->hash = hash_key (tab, key);
TABLE(tab)->total_nodes++;
if (TABLE(tab)->total_nodes >= 2 * TABLE(tab)->total_buckets)
{
int old_size, new_size, i;
node **new_bins, **old_bins;
old_bins = TABLE(tab)->buckets;
old_size = TABLE(tab)->total_buckets;
/* The (misguided?) idea is to set number of buckets as
(2^N) - 1, then increase N each time we get twice as
many keys as buckets. Start at N=5 */
if (old_size == 0)
new_size = 31;
else
new_size = (old_size + 1) * 2 - 1;
new_bins = rep_alloc (sizeof (node *) * new_size);
rep_data_after_gc += sizeof (node *) * new_size;
memset (new_bins, 0, sizeof (node *) * new_size);
TABLE(tab)->buckets = new_bins;
TABLE(tab)->total_buckets = new_size;
for (i = 0; i < old_size; i++)
{
node *ptr, *next;
for (ptr = old_bins[i]; ptr != 0; ptr = next)
{
int index = hash_key_to_bin (tab, ptr->hash);
next = ptr->next;
ptr->next = new_bins[index];
new_bins[index] = ptr;
}
}
if (old_size > 0)
rep_free (old_bins);
}
bin = hash_key_to_bin (tab, n->hash);
n->next = TABLE(tab)->buckets[bin];
TABLE(tab)->buckets[bin] = n;
if (TABLE(tab)->guardian)
Fprimitive_guardian_push (TABLE(tab)->guardian, n->key);
}
n->value = value;
return value;
}
DEFUN("table-unset", Ftable_unset, Stable_unset,
(repv tab, repv key), rep_Subr2) /*
::doc:rep.data.tables#table-unset::
table-unset TABLE KEY
Remove any value stored in TABLE associated with KEY.
::end:: */
{
node *n;
rep_DECLARE1(tab, TABLEP);
n = lookup (tab, key);
if (n != 0)
{
int bin = hash_key_to_bin (tab, n->hash);
node **ptr;
for (ptr = &(TABLE(tab)->buckets[bin]);
*ptr != 0; ptr = &((*ptr)->next))
{
if (*ptr == n)
{
*ptr = n->next;
rep_free (n);
TABLE(tab)->total_nodes--;
return Qt;
}
}
}
return Qnil;
}
DEFUN("table-walk", Ftable_walk, Stable_walk,
(repv fun, repv tab), rep_Subr2) /*
::doc:rep.data.tables#table-walk::
table-walk FUNCTION TABLE
Call FUNCTION for every key-value pair stored in hash table TABLE. For
each pair, the function is called with arguments `(KEY VALUE)'.
::end:: */
{
rep_GC_root gc_tab, gc_fun;
int i;
rep_DECLARE1(tab, TABLEP);
rep_PUSHGC (gc_tab, tab);
rep_PUSHGC (gc_fun, fun);
for (i = 0; i < TABLE(tab)->total_buckets; i++)
{
node *n;
for (n = TABLE(tab)->buckets[i]; n != 0; n = n->next)
{
if (!rep_call_lisp2 (fun, n->key, n->value))
break;
}
}
rep_POPGC; rep_POPGC;
return rep_throw_value ? rep_NULL : Qnil;
}
DEFUN ("table-size", Ftable_size, Stable_size,
(repv tab), rep_Subr1) /*
::doc:rep.data.tables#table-size::
table-size TABLE
Returns the number of items currently stored in TABLE.
::end:: */
{
rep_DECLARE1 (tab, TABLEP);
return rep_make_long_int (TABLE (tab)->total_nodes);
}
DEFUN("tables-after-gc", Ftables_after_gc, Stables_after_gc, (void), rep_Subr0)
{
table *x;
for (x = all_tables; x != 0; x = x->next)
{
if (x->guardian)
{
repv key;
while ((key = Fprimitive_guardian_pop (x->guardian)) != Qnil)
{
rep_GC_root gc_key;
rep_PUSHGC (gc_key, key);
Ftable_unset (rep_VAL (x), key);
rep_POPGC;
}
}
}
return Qnil;
}
/* dl hooks */
repv
rep_dl_init (void)
{
repv tem;
table_type = rep_register_new_type ("table", 0, table_print, table_print,
table_sweep, table_mark,
0, 0, 0, 0, 0, 0, 0);
tem = Fsymbol_value (Qafter_gc_hook, Qt);
if (rep_VOIDP (tem))
tem = Qnil;
Fset (Qafter_gc_hook, Fcons (rep_VAL(&Stables_after_gc), tem));
tem = rep_push_structure ("rep.data.tables");
/* ::alias:tables rep.data.tables:: */
rep_alias_structure ("tables");
rep_ADD_SUBR(Smake_table);
rep_ADD_SUBR(Smake_weak_table);
rep_ADD_SUBR(Sstring_hash);
rep_ADD_SUBR(Ssymbol_hash);
rep_ADD_SUBR(Seq_hash);
rep_ADD_SUBR(Sequal_hash);
rep_ADD_SUBR(Stablep);
rep_ADD_SUBR(Stable_ref);
rep_ADD_SUBR(Stable_bound_p);
rep_ADD_SUBR(Stable_set);
rep_ADD_SUBR(Stable_unset);
rep_ADD_SUBR(Stable_walk);
rep_ADD_SUBR(Stable_size);
rep_ADD_INTERNAL_SUBR(Stables_after_gc);
return rep_pop_structure (tem);
}
syntax highlighted by Code2HTML, v. 0.9.1