/* File: hash_xsb.c
** Author(s): Ernie Johnson
** Contact: xsb-contact@cs.sunysb.edu
**
** Copyright (C) The Research Foundation of SUNY, 1986, 1993-1998
**
** XSB is free software; you can redistribute it and/or modify it under the
** terms of the GNU Library General Public License as published by the Free
** Software Foundation; either version 2 of the License, or (at your option)
** any later version.
**
** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
** FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for
** more details.
**
** You should have received a copy of the GNU Library General Public License
** along with XSB; if not, write to the Free Software Foundation,
** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**
** $Id: hash_xsb.c,v 1.7 2002/08/28 19:25:47 lfcastro Exp $
**
*/
#include "xsb_config.h"
#include "xsb_debug.h"
#include <stdio.h>
#include <stdlib.h>
#include "auxlry.h"
#include "cell_xsb.h"
#include "hash_xsb.h"
#include "psc_xsb.h"
#include "flags_xsb.h"
/*
* The "Atom Table" is divided into two structures, based on the type
* of the information to be interned. Symbolic constants, structures,
* and predicates which are assigned Psc records are stored in the
* "symbol table", while simple strings are stored in the "string table."
* Both subtables are maintained as hash tables.
*/
Hash_Table symbol_table = {8191, 0, NULL};
Hash_Table string_table = {16381, 0, NULL};
/*
* Prime numbers which are close to powers of 2. Used for choosing
* the next size for a hash table.
*/
#define NUM_OF_PRIMES 16
static unsigned int primes[NUM_OF_PRIMES] =
{127, 257, 509, 1021, 2053, 4099, 8191, 16381, 32771, 65537, 131071,
262147, 524287, 1048573, 2097143, 4194301};
/*
* Return the next prime number greater than the number received.
* If no such prime number can be found, compute an approximate one.
*/
unsigned long next_prime(unsigned long some_int) {
byte i;
i = 0;
while ( (some_int >= primes[i]) && (i < NUM_OF_PRIMES) )
i++;
if (i < NUM_OF_PRIMES)
return (primes[i]);
else /* Ran out of prime numbers */
return (2 * some_int - 1);
}
/*
* Hash object on first 25 characters of its name, plus its arity.
* Return a bucket number in the range [0 .. (hash_table_size - 1)].
*/
unsigned long hash(char *obj_name, byte arity, unsigned long hash_table_size) {
unsigned long hashval, temp;
int i, j, k;
hashval = 0;
if (*obj_name != '\0')
for (k=0; k<10; k++) {
for (i = 4; i >= 0; i--) {
temp = 0;
for (j = 0; j < 5; j++) {
temp = (temp << i) + *obj_name;
obj_name++;
if (*obj_name == '\0') {
hashval = hashval + temp;
goto Done;
}
}
hashval = hashval + temp;
}
}
Done:
return ((hashval + arity) MOD hash_table_size);
}
/* unsigned long hash(char *obj_name, byte arity, unsigned long hash_table_size) { */
/* unsigned long hashval, temp; */
/* int i, j; */
/* hashval = 0; */
/* if (*obj_name != '\0') */
/* for (i = 4; i >= 0; i--) { */
/* temp = 0; */
/* for (j = 0; j < 5; j++) { */
/* temp = (temp << i) + *obj_name; */
/* obj_name++; */
/* if (*obj_name == '\0') { */
/* hashval = hashval + temp; */
/* goto Done; */
/* } */
/* } */
/* hashval = hashval + temp; */
/* } */
/* Done: */
/* return ((hashval + arity) MOD hash_table_size); */
/* } */
/*
* Increase the size of the Symbol Table and rehash each entry.
*/
void expand_symbol_table() {
Pair *new_table, *bucket_ptr, cur_pair, next_pair;
Psc cur_psc;
unsigned long index, new_size, new_index;
new_size = next_prime(symbol_table.size);
new_table = (Pair *)calloc(new_size, sizeof(void *));
for (bucket_ptr = (Pair *)symbol_table.table, index = 0;
index < symbol_table.size; bucket_ptr++, index++)
for (cur_pair = *bucket_ptr; cur_pair != NULL; cur_pair = next_pair) {
next_pair = pair_next(cur_pair);
cur_psc = pair_psc(cur_pair);
new_index = hash(get_name(cur_psc), get_arity(cur_psc), new_size);
pair_next(cur_pair) = new_table[new_index];
new_table[new_index] = cur_pair;
}
free((void *)symbol_table.table);
symbol_table.size = new_size;
symbol_table.table = (void **)new_table;
}
/*
* Increase the size of the String Table and rehash each entry.
*
* String Table entries have the form:
* +--------------------------+
* | Ptr_to_Next | String ... |
* +--------------------------+
*/
void expand_string_table() {
void **new_table, **bucket_ptr, **cur_entry, **next_entry;
char *string;
unsigned long index, new_size, new_index;
new_size = next_prime(string_table.size);
new_table = (void **)calloc(new_size, sizeof(void *));
for (bucket_ptr = string_table.table, index = 0;
index < string_table.size;
bucket_ptr++, index++)
for (cur_entry = (void **)*bucket_ptr;
cur_entry != NULL;
cur_entry = next_entry) {
next_entry = (void **)*cur_entry;
string = (char *)(cur_entry + 1);
new_index = hash(string, 0, new_size);
*cur_entry = new_table[new_index];
new_table[new_index] = (void *)cur_entry;
}
free((void *)string_table.table);
string_table.size = new_size;
string_table.table = new_table;
}
/*
* Collect and report statistics on the symbol and string tables:
* - number of used and unused buckets
* - first and last buckets containing objects
* - buckets which are most full
*/
void symbol_table_stats() {
unsigned long i, symbols, bucket_contains, used_buckets, unused_buckets,
fullest_bucket_size, fullest_bucket_num, last_index;
int first_index;
Pair pair_ptr;
symbols = used_buckets = unused_buckets = last_index = 0;
fullest_bucket_size = fullest_bucket_num = 0;
first_index = -1;
for (i = 0; i < symbol_table.size; i++) {
if (symbol_table.table[i] != NULL) {
if (first_index == -1)
first_index = i;
last_index = i;
used_buckets++;
bucket_contains = 0;
for (pair_ptr = (Pair)symbol_table.table[i]; pair_ptr != NULL;
pair_ptr = pair_next(pair_ptr)) {
symbols++;
bucket_contains++;
}
if (bucket_contains > fullest_bucket_size) {
fullest_bucket_size = bucket_contains;
fullest_bucket_num = i;
}
}
else
unused_buckets++;
}
printf("\nSymbol table statistics:");
printf("\n------------------------\n");
printf("Table Size:\t%lu\n", symbol_table.size);
printf("Total Symbols:\t%lu\n", symbols);
if (symbols != symbol_table.contains)
printf("Symbol count incorrect in 'symbol_table': %lu\n",
symbol_table.contains);
printf("\tused buckets:\t%lu (range: [%d, %lu])\n", used_buckets,
first_index, last_index);
printf("\tunused buckets:\t%lu\n", unused_buckets);
printf("\tfullest bucket:\t%lu (size: %lu)\n", fullest_bucket_num,
fullest_bucket_size);
}
void string_table_stats() {
unsigned long i, strings, bucket_contains, used_buckets, unused_buckets,
fullest_bucket_size, fullest_bucket_num, last_index;
int first_index;
void *ptr;
strings = used_buckets = unused_buckets = last_index = 0;
fullest_bucket_size = fullest_bucket_num = 0;
first_index = -1;
for (i = 0; i < string_table.size; i++) {
if (string_table.table[i] != NULL) {
if (first_index == -1)
first_index = i;
last_index = i;
used_buckets++;
bucket_contains = 0;
for (ptr = string_table.table[i]; ptr != NULL; ptr = *(void **)ptr) {
strings++;
bucket_contains++;
}
if (bucket_contains > fullest_bucket_size) {
fullest_bucket_size = bucket_contains;
fullest_bucket_num = i;
}
}
else
unused_buckets++;
}
printf("\nString table statistics:");
printf("\n------------------------\n");
printf("Table Size:\t%lu\n", string_table.size);
printf("Total Strings:\t%lu\n", strings);
if (strings != string_table.contains)
printf("String count incorrect in 'string_table': %lu\n",
string_table.contains);
printf("\tused buckets:\t%lu (range: [%d, %lu])\n", used_buckets,
first_index, last_index);
printf("\tunused buckets:\t%lu\n", unused_buckets);
printf("\tfullest bucket:\t%lu (size: %lu)\n\n", fullest_bucket_num,
fullest_bucket_size);
}
syntax highlighted by Code2HTML, v. 0.9.1