/*
* Copyright (c) 2002, The Tendra Project <http://www.ten15.org/>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* Crown Copyright (c) 1997
*
* This TenDRA(r) Computer Program is subject to Copyright
* owned by the United Kingdom Secretary of State for Defence
* acting through the Defence Evaluation and Research Agency
* (DERA). It is made available to Recipients with a
* royalty-free licence for its use, reproduction, transfer
* to other parties and amendment for any purpose not excluding
* product development provided that any such use et cetera
* shall be deemed to be acceptance of the following conditions:-
*
* (1) Its Recipients shall ensure that this Notice is
* reproduced upon any copies or amended versions of it;
*
* (2) Any amended version of it shall be clearly marked to
* show both the nature of and the organisation responsible
* for the relevant amendment or amendments;
*
* (3) Its onward transfer from a recipient to another
* party shall be deemed to be that party's acceptance of
* these conditions;
*
* (4) DERA gives no warranty or assurance as to its
* quality or suitability for any purpose and DERA accepts
* no liability whatsoever in relation to any use to which
* it may be put.
*
* $TenDRA: tendra/src/producers/common/parse/hash.c,v 1.10 2005/08/04 20:22:17 stefanf Exp $
*/
#include "config.h"
#include "producer.h"
#include "cstring.h"
#include "fmm.h"
#include "msgcat.h"
#include "c_types.h"
#include "ctype_ops.h"
#include "etype_ops.h"
#include "ftype_ops.h"
#include "hashid_ops.h"
#include "id_ops.h"
#include "itype_ops.h"
#include "member_ops.h"
#include "type_ops.h"
#include "error.h"
#include "option.h"
#include "basetype.h"
#include "char.h"
#include "chktype.h"
#include "hash.h"
#include "lex.h"
#include "namespace.h"
#include "symbols.h"
#include "syntax.h"
#include "token.h"
#include "ustring.h"
/*
* HASH TABLES
*
* The hash tables consist of an array of hash identifiers, one for each
* hash value. All the hash table entries with the same hash value are
* chained into a list using their next field. There are two hash tables,
* one for strings and one for types.
*/
HASHID *hash_table;
static HASHID *hash_type_table;
/*
* IDENTIFIER NAME HASHING FUNCTION
*
* This routine calculates the hash value associated with the identifier
* name s. The main parser routine, read_token, calculates the hash values
* of the identifiers it reads on the fly and stores them in token_hash.
* Therefore any changes to this routine should also be reflected in
* read_token.
*/
unsigned long
hash(string s)
{
character c;
unsigned long h = 0;
while (c = *(s++), c != 0) {
h = HASH_POWER * h + (unsigned long) c;
}
return (h % HASH_SIZE);
}
/*
* TYPE NAME HASHING FUNCTION
*
* This routine calculates the hash value associated with the type t.
* This is used in the look-up for the conversion function identifier
* 'operator t'.
*/
static unsigned long
hash_type(TYPE t)
{
unsigned long h = 0;
while (!IS_NULL_type (t)) {
unsigned long sub = 0;
unsigned long tag = (unsigned long) TAG_type (t);
CV_SPEC qual = DEREF_cv (type_qual (t));
switch (tag) {
case type_integer_tag : {
INT_TYPE it = DEREF_itype (type_integer_rep (t));
if (IS_itype_basic (it)) {
BUILTIN_TYPE no = DEREF_ntype (itype_basic_no (it));
sub = (unsigned long) no;
}
t = NULL_type;
break;
}
case type_floating_tag : {
FLOAT_TYPE ft = DEREF_ftype (type_floating_rep (t));
if (IS_ftype_basic (ft)) {
BUILTIN_TYPE no = DEREF_ntype (ftype_basic_no (ft));
sub = (unsigned long) no;
}
t = NULL_type;
break;
}
case type_ptr_tag :
case type_ref_tag : {
t = DEREF_type (type_ptr_etc_sub (t));
break;
}
case type_ptr_mem_tag : {
CLASS_TYPE ct = DEREF_ctype (type_ptr_mem_of (t));
IDENTIFIER cid = DEREF_id (ctype_name (ct));
HASHID cnm = DEREF_hashid (id_name (cid));
sub = DEREF_ulong (hashid_hash (cnm));
t = DEREF_type (type_ptr_mem_sub (t));
break;
}
case type_func_tag : {
LIST (TYPE) p = DEREF_list (type_func_ptypes (t));
sub = (unsigned long) LENGTH_list (p);
t = DEREF_type (type_func_ret (t));
break;
}
case type_array_tag : {
t = DEREF_type (type_array_sub (t));
break;
}
case type_bitfield_tag : {
t = find_bitfield_type (t);
break;
}
case type_compound_tag : {
CLASS_TYPE ct = DEREF_ctype (type_compound_defn (t));
IDENTIFIER cid = DEREF_id (ctype_name (ct));
HASHID cnm = DEREF_hashid (id_name (cid));
sub = DEREF_ulong (hashid_hash (cnm));
t = NULL_type;
break;
}
case type_enumerate_tag : {
ENUM_TYPE et = DEREF_etype (type_enumerate_defn (t));
IDENTIFIER eid = DEREF_id (etype_name (et));
HASHID enm = DEREF_hashid (id_name (eid));
sub = DEREF_ulong (hashid_hash (enm));
t = NULL_type;
break;
}
default : {
t = NULL_type;
break;
}
}
h += (64 * sub + 4 * tag + (unsigned long) qual);
}
return (h % HASH_TYPE_SIZE);
}
/*
* INITIALISE A HASH TABLE ENTRY
*
* This routine initialises the hash table entry nm by creating a dummy
* identifier with lexical token value tok for it to point to.
*/
static void
init_hashid(HASHID nm, int tok)
{
IDENTIFIER id;
MAKE_id_dummy (nm, dspec_none, NULL_nspace, crt_loc, id);
COPY_ulong (id_no (id), (unsigned long) tok);
COPY_id (hashid_id (nm), id);
COPY_id (hashid_cache (nm), id);
return;
}
/*
* LOOK UP AN IDENTIFIER NAME IN THE HASH TABLE
*
* This routine looks up the identifier name s in the hash table, creating
* it if it does not already exist. h gives the value of hash (s) (which
* gets checked by an assertion). The argument tok is set to lex_unknown to
* indicate that the identifier has just been read by read_token, otherwise
* it gives the underlying default lexical token.
*/
HASHID
lookup_name(string s, unsigned long h, int ext, int tok)
{
unsigned tag;
unsigned long len;
HASHID prev = NULL_hashid;
HASHID nm = hash_table [h];
ASSERT (h == hash (s));
/* Search through existing entries */
while (!IS_NULL_hashid (nm)) {
string t = DEREF_string (hashid_name_etc_text (nm));
int c = ustrcmp (t, s);
if (c == 0) {
/* Name matches */
return (nm);
}
if (c > 0) break;
prev = nm;
nm = DEREF_hashid (hashid_next (nm));
}
/* Create new hash table entry */
len = (unsigned long) ustrlen (s);
if (tok == lex_unknown) {
s = ustring_ncopy (s, (size_t) len);
tok = lex_identifier;
}
tag = hashid_name_tag;
if (ext) {
/* Check for extended identifiers */
string t = s;
while (t = ustrchr (t, char_backslash), t != NULL) {
t++;
if (*t == char_u) {
/* '\uxxxx' counts as one character */
len -= 5;
} else {
/* '\Uxxxxxxxx' counts as one character */
len -= 9;
}
}
tag = hashid_ename_tag;
}
MAKE_hashid_name_etc (tag, nm, h, s, nm);
if (IS_NULL_hashid (prev)) {
hash_table [h] = nm;
} else {
COPY_hashid (hashid_next (prev), nm);
}
init_hashid (nm, tok);
if (len >= max_id_length) {
/* Check name length */
IGNORE check_value (OPT_VAL_name_limit, len, nm);
}
return (nm);
}
/*
* CREATE A SPECIAL FUNCTION NAME
*
* This routine creates a constructor, destructor or conversion function
* name (as indicated by tag) for the non-class type t named id.
*/
static HASHID
lookup_special(TYPE t, IDENTIFIER id, unsigned tag)
{
unsigned long h = hash_type (t);
HASHID prev = hash_type_table [h];
HASHID nm = prev;
/* Search through existing entries */
while (!IS_NULL_hashid (nm)) {
if (TAG_hashid (nm) == tag) {
TYPE s = DEREF_type (hashid_constr_etc_type (nm));
if (eq_type (s, t)) {
COPY_id (hashid_constr_etc_tid (nm), id);
return (nm);
}
}
nm = DEREF_hashid (hashid_next (nm));
}
/* Create new hash table entry */
ASSERT (h < HASH_SIZE);
MAKE_hashid_constr_etc (tag, prev, h, t, id, nm);
init_hashid (nm, lex_identifier);
hash_type_table [h] = nm;
return (nm);
}
/*
* CREATE THE CONSTRUCTOR FOR A TYPE
*
* This routine creates the hash table entry for the constructor of type t
* and name id.
*/
HASHID
lookup_constr(TYPE t, IDENTIFIER id)
{
HASHID nm;
if (IS_type_compound (t)) {
CLASS_TYPE ct = DEREF_ctype (type_compound_defn (t));
IDENTIFIER cid = DEREF_id (ctype_constr (ct));
if (IS_NULL_id (cid)) {
/* Create class constructor */
NAMESPACE ns = DEREF_nspace (ctype_member (ct));
MAKE_hashid_constr (NULL_hashid, 0, t, id, nm);
init_hashid (nm, lex_identifier);
cid = DEREF_id (hashid_id (nm));
COPY_nspace (id_parent (cid), ns);
COPY_id (ctype_constr (ct), cid);
IGNORE search_member (ns, nm, 1);
} else {
nm = DEREF_hashid (id_name (cid));
}
} else {
nm = lookup_special (t, id, hashid_constr_tag);
}
return (nm);
}
/*
* CREATE THE DESTRUCTOR FOR A TYPE
*
* This routine creates the hash table entry for the destructor of type t
* and name id.
*/
HASHID
lookup_destr(TYPE t, IDENTIFIER id)
{
HASHID nm;
if (IS_type_compound (t)) {
CLASS_TYPE ct = DEREF_ctype (type_compound_defn (t));
IDENTIFIER cid = DEREF_id (ctype_destr (ct));
if (IS_NULL_id (cid)) {
/* Create class destructor */
NAMESPACE ns = DEREF_nspace (ctype_member (ct));
MAKE_hashid_destr (NULL_hashid, 1, t, id, nm);
init_hashid (nm, lex_identifier);
cid = DEREF_id (hashid_id (nm));
COPY_nspace (id_parent (cid), ns);
COPY_id (ctype_destr (ct), cid);
IGNORE search_member (ns, nm, 1);
} else {
nm = DEREF_hashid (id_name (cid));
}
} else {
nm = lookup_special (t, id, hashid_destr_tag);
}
return (nm);
}
/*
* LOOK UP A CONVERSION FUNCTION NAME
*
* This routine returns the hash table entry for the conversion function
* corresponding to the type t.
*/
HASHID
lookup_conv(TYPE t)
{
HASHID nm = lookup_special (t, NULL_id, hashid_conv_tag);
return (nm);
}
/*
* OVERLOADED OPERATOR LOOK-UP TABLE
*
* This table gives the hash table entries for the overloaded operator
* function names, 'operator +' etc. It gives a straight look-up depending
* on the lexical token number of the operator. All ISO keyword and
* digraphs are represented in their primary form.
*/
HASHID *hash_ops_table;
/*
* CREATE AN OPERATOR HASH TABLE ENTRY
*
* This routine creates a hash table entry for 'operator op' when op has
* lexical token number t.
*/
static HASHID
make_op(int t)
{
HASHID nm;
unsigned long h = (unsigned long) t;
MAKE_hashid_op (NULL_hashid, (h % HASH_SIZE), t, nm);
init_hashid (nm, lex_identifier);
return (nm);
}
/*
* LOOK UP AN ANONYMOUS IDENTIFIER
*
* This routine creates a hash table entry for an anonymous identifier.
* Note that each anonymous identifier gives a distinct hash table entry.
*/
HASHID
lookup_anon(void)
{
HASHID nm;
static unsigned long anon_no = 0;
unsigned long a = anon_no++;
MAKE_hashid_anon (NULL_hashid, (a % HASH_SIZE), a, nm);
init_hashid (nm, lex_identifier);
return (nm);
}
/*
* EXPAND AN IDENTIFIER NAME
*
* This routine expands the identifier name nm. For example, if t is
* a tokenised type defined to be int, then 'operator t' expands to
* 'operator int'. ct gives the expansion type for constructors and
* destructors.
*/
HASHID
expand_name(HASHID nm, CLASS_TYPE ct)
{
switch (TAG_hashid (nm)) {
case hashid_constr_tag : {
/* Constructor names */
if (!IS_NULL_ctype (ct)) {
IDENTIFIER id = DEREF_id (ctype_constr (ct));
nm = DEREF_hashid (id_name (id));
} else {
TYPE t = DEREF_type (hashid_constr_type (nm));
TYPE s = expand_type (t, 1);
if (!EQ_type (t, s)) {
nm = lookup_constr (s, NULL_id);
}
}
break;
}
case hashid_destr_tag : {
/* Destructor names */
if (!IS_NULL_ctype (ct)) {
IDENTIFIER id = DEREF_id (ctype_destr (ct));
nm = DEREF_hashid (id_name (id));
} else {
TYPE t = DEREF_type (hashid_destr_type (nm));
TYPE s = expand_type (t, 1);
if (!EQ_type (t, s)) {
nm = lookup_destr (s, NULL_id);
}
}
break;
}
case hashid_conv_tag : {
/* Conversion function names */
TYPE t = DEREF_type (hashid_conv_type (nm));
TYPE s = expand_type (t, 1);
if (!EQ_type (t, s)) {
nm = lookup_conv (s);
}
break;
}
}
return (nm);
}
/*
* FIND NEXT VERSION OF AN EXPANDED IDENTIFIER NAME
*
* There is a complication in the expansion of conversion function names
* in that when types are identified more than one name may refer to the
* same type. This routine finds the next such possible name returning
* the null identifier name to indicate that there are no more.
*/
HASHID
next_expand_name(HASHID nm)
{
if (IS_hashid_conv (nm)) {
int started = 0;
TYPE t = DEREF_type (hashid_conv_type (nm));
unsigned long h = hash_type (t);
HASHID pnm = hash_type_table [h];
while (!IS_NULL_hashid (pnm)) {
if (EQ_hashid (pnm, nm)) {
started = 1;
} else if (started && IS_hashid_conv (pnm)) {
TYPE s = DEREF_type (hashid_conv_type (pnm));
if (eq_type (s, t)) return (pnm);
}
pnm = DEREF_hashid (hashid_next (pnm));
}
}
return (NULL_hashid);
}
/*
* FIND A HASH IDENTIFIER NUMBER
*
* This routine finds the lexical token number associated with the hash
* identifier nm. For a keyword, whether active or not, this is the
* associated value from syntax.h, otherwise it is lex_identifier.
*/
int
find_hashid(HASHID nm)
{
unsigned long lex;
IDENTIFIER id = DEREF_id (hashid_id (nm));
while (!IS_id_dummy (id)) {
/* Scan to last hidden value */
id = DEREF_id (id_alias (id));
}
lex = DEREF_ulong (id_no (id));
return ((int) lex);
}
/*
* FIND AN UNDERLYING IDENTIFIER
*
* This routine finds the dummy identifier underlying id.
*/
IDENTIFIER
underlying_id(IDENTIFIER id)
{
HASHID nm = DEREF_hashid (id_name (id));
id = DEREF_id (hashid_id (nm));
while (!IS_id_dummy (id)) {
/* Scan to last hidden value */
id = DEREF_id (id_alias (id));
}
return (id);
}
/*
* SET THE LOCATION OF A COMPLEX IDENTIFIER
*
* The precise location of the last use of a hash identifier is stored
* in the loc field of its associated dummy identifier. For simple
* identifiers this is set in read_token, however for more complex
* cases this routine sets the location of id to be the location of pid.
*/
void
set_hashid_loc(IDENTIFIER id, IDENTIFIER pid)
{
if (!IS_NULL_id (pid)) {
LOCATION loc;
if (!IS_id_dummy (id)) id = underlying_id (id);
if (!IS_id_dummy (pid)) pid = underlying_id (pid);
DEREF_loc (id_loc (pid), loc);
COPY_loc (id_loc (id), loc);
}
return;
}
/*
* MODIFY AN IDENTIFIER NAME
*
* This routine modifies the name of the identifier id by adding a prime
* to it. This is intended primarily for debugging purposes.
*/
void
prime_name(IDENTIFIER id)
{
if (!IS_NULL_id (id)) {
HASHID nm = DEREF_hashid (id_name (id));
if (IS_hashid_name (nm)) {
string s = DEREF_string (hashid_name_text (nm));
s = ustring_concat (s, ustrlit ("'"));
nm = lookup_name (s, hash (s), 0, lex_identifier);
}
COPY_hashid (id_name (id), nm);
}
return;
}
/*
* KEYWORD HASH TABLE ENTRIES
*
* The table hash_keyword gives the hash table entries for the keywords.
* These are numbered from LAST_KEYWORD to FIRST_KEYWORD. The array
* should be accessed through the macro KEYWORD defined in hash.h, which
* includes the appropriate offset.
*/
HASHID hash_keyword [ LAST_KEYWORD - FIRST_KEYWORD + 1 ];
IDENTIFIER underlying_op = NULL_id;
/*
* INITIALISE THE HASH TABLE
*
* This routine allocates space for the hash table and sets all its entries
* to NULL. It also sets up the operator look-up table.
*/
void
init_hash(void)
{
int n;
unsigned long i;
/* Set up identifier hash table */
hash_table = xmalloc_nof (HASHID, HASH_SIZE);
for (i = 0; i < HASH_SIZE; i++) {
hash_table [i] = NULL_hashid;
}
/* Set up type hash table */
hash_type_table = xmalloc_nof (HASHID, HASH_TYPE_SIZE);
for (i = 0; i < HASH_TYPE_SIZE; i++) {
hash_type_table [i] = NULL_hashid;
}
/* Set up operator look-up table */
hash_ops_table = xmalloc_nof (HASHID, LAST_TOKEN + 1);
for (n = 0; n <= LAST_TOKEN; n++) {
hash_ops_table [n] = NULL_hashid;
}
/* Allocate hash table entries for all symbols */
for (n = FIRST_C_SYMBOL; n <= LAST_C_SYMBOL; n++) {
hash_ops_table [n] = make_op (n);
}
for (n = FIRST_CPP_SYMBOL; n <= LAST_CPP_SYMBOL; n++) {
hash_ops_table [n] = make_op (n);
}
for (n = FIRST_EXTRA_SYMBOL; n <= LAST_EXTRA_SYMBOL; n++) {
hash_ops_table [n] = make_op (n);
}
hash_ops_table [ lex_array_Hop ] = make_op (lex_array_Hop);
hash_ops_table [ lex_cond_Hop ] = make_op (lex_cond_Hop);
hash_ops_table [ lex_delete ] = make_op (lex_delete);
hash_ops_table [ lex_delete_Harray ] = make_op (lex_delete_Harray);
hash_ops_table [ lex_func_Hop ] = make_op (lex_func_Hop);
hash_ops_table [ lex_new ] = make_op (lex_new);
hash_ops_table [ lex_new_Harray ] = make_op (lex_new_Harray);
hash_ops_table [ lex_alignof ] = make_op (lex_alignof);
hash_ops_table [ lex_sizeof ] = make_op (lex_sizeof);
hash_ops_table [ lex_typeid ] = make_op (lex_typeid);
hash_ops_table [ lex_vtable ] = make_op (lex_vtable);
/* Map secondary representations to primary representations */
for (n = FIRST_DIGRAPH; n <= LAST_DIGRAPH; n++) {
int m = primary_form (n);
hash_ops_table [n] = hash_ops_table [m];
}
for (n = FIRST_ISO_KEYWORD; n <= LAST_ISO_KEYWORD; n++) {
int m = primary_form (n);
hash_ops_table [n] = hash_ops_table [m];
}
/* This is necessary for the definition of KEYWORD */
ASSERT (FIRST_KEYWORD == lex_auto);
return;
}
syntax highlighted by Code2HTML, v. 0.9.1