/* * Copyright (c) 2002, The Tendra Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * Crown Copyright (c) 1997 * * This TenDRA(r) Computer Program is subject to Copyright * owned by the United Kingdom Secretary of State for Defence * acting through the Defence Evaluation and Research Agency * (DERA). It is made available to Recipients with a * royalty-free licence for its use, reproduction, transfer * to other parties and amendment for any purpose not excluding * product development provided that any such use et cetera * shall be deemed to be acceptance of the following conditions:- * * (1) Its Recipients shall ensure that this Notice is * reproduced upon any copies or amended versions of it; * * (2) Any amended version of it shall be clearly marked to * show both the nature of and the organisation responsible * for the relevant amendment or amendments; * * (3) Its onward transfer from a recipient to another * party shall be deemed to be that party's acceptance of * these conditions; * * (4) DERA gives no warranty or assurance as to its * quality or suitability for any purpose and DERA accepts * no liability whatsoever in relation to any use to which * it may be put. * * $TenDRA: tendra/src/producers/common/parse/hash.c,v 1.10 2005/08/04 20:22:17 stefanf Exp $ */ #include "config.h" #include "producer.h" #include "cstring.h" #include "fmm.h" #include "msgcat.h" #include "c_types.h" #include "ctype_ops.h" #include "etype_ops.h" #include "ftype_ops.h" #include "hashid_ops.h" #include "id_ops.h" #include "itype_ops.h" #include "member_ops.h" #include "type_ops.h" #include "error.h" #include "option.h" #include "basetype.h" #include "char.h" #include "chktype.h" #include "hash.h" #include "lex.h" #include "namespace.h" #include "symbols.h" #include "syntax.h" #include "token.h" #include "ustring.h" /* * HASH TABLES * * The hash tables consist of an array of hash identifiers, one for each * hash value. All the hash table entries with the same hash value are * chained into a list using their next field. There are two hash tables, * one for strings and one for types. */ HASHID *hash_table; static HASHID *hash_type_table; /* * IDENTIFIER NAME HASHING FUNCTION * * This routine calculates the hash value associated with the identifier * name s. The main parser routine, read_token, calculates the hash values * of the identifiers it reads on the fly and stores them in token_hash. * Therefore any changes to this routine should also be reflected in * read_token. */ unsigned long hash(string s) { character c; unsigned long h = 0; while (c = *(s++), c != 0) { h = HASH_POWER * h + (unsigned long) c; } return (h % HASH_SIZE); } /* * TYPE NAME HASHING FUNCTION * * This routine calculates the hash value associated with the type t. * This is used in the look-up for the conversion function identifier * 'operator t'. */ static unsigned long hash_type(TYPE t) { unsigned long h = 0; while (!IS_NULL_type (t)) { unsigned long sub = 0; unsigned long tag = (unsigned long) TAG_type (t); CV_SPEC qual = DEREF_cv (type_qual (t)); switch (tag) { case type_integer_tag : { INT_TYPE it = DEREF_itype (type_integer_rep (t)); if (IS_itype_basic (it)) { BUILTIN_TYPE no = DEREF_ntype (itype_basic_no (it)); sub = (unsigned long) no; } t = NULL_type; break; } case type_floating_tag : { FLOAT_TYPE ft = DEREF_ftype (type_floating_rep (t)); if (IS_ftype_basic (ft)) { BUILTIN_TYPE no = DEREF_ntype (ftype_basic_no (ft)); sub = (unsigned long) no; } t = NULL_type; break; } case type_ptr_tag : case type_ref_tag : { t = DEREF_type (type_ptr_etc_sub (t)); break; } case type_ptr_mem_tag : { CLASS_TYPE ct = DEREF_ctype (type_ptr_mem_of (t)); IDENTIFIER cid = DEREF_id (ctype_name (ct)); HASHID cnm = DEREF_hashid (id_name (cid)); sub = DEREF_ulong (hashid_hash (cnm)); t = DEREF_type (type_ptr_mem_sub (t)); break; } case type_func_tag : { LIST (TYPE) p = DEREF_list (type_func_ptypes (t)); sub = (unsigned long) LENGTH_list (p); t = DEREF_type (type_func_ret (t)); break; } case type_array_tag : { t = DEREF_type (type_array_sub (t)); break; } case type_bitfield_tag : { t = find_bitfield_type (t); break; } case type_compound_tag : { CLASS_TYPE ct = DEREF_ctype (type_compound_defn (t)); IDENTIFIER cid = DEREF_id (ctype_name (ct)); HASHID cnm = DEREF_hashid (id_name (cid)); sub = DEREF_ulong (hashid_hash (cnm)); t = NULL_type; break; } case type_enumerate_tag : { ENUM_TYPE et = DEREF_etype (type_enumerate_defn (t)); IDENTIFIER eid = DEREF_id (etype_name (et)); HASHID enm = DEREF_hashid (id_name (eid)); sub = DEREF_ulong (hashid_hash (enm)); t = NULL_type; break; } default : { t = NULL_type; break; } } h += (64 * sub + 4 * tag + (unsigned long) qual); } return (h % HASH_TYPE_SIZE); } /* * INITIALISE A HASH TABLE ENTRY * * This routine initialises the hash table entry nm by creating a dummy * identifier with lexical token value tok for it to point to. */ static void init_hashid(HASHID nm, int tok) { IDENTIFIER id; MAKE_id_dummy (nm, dspec_none, NULL_nspace, crt_loc, id); COPY_ulong (id_no (id), (unsigned long) tok); COPY_id (hashid_id (nm), id); COPY_id (hashid_cache (nm), id); return; } /* * LOOK UP AN IDENTIFIER NAME IN THE HASH TABLE * * This routine looks up the identifier name s in the hash table, creating * it if it does not already exist. h gives the value of hash (s) (which * gets checked by an assertion). The argument tok is set to lex_unknown to * indicate that the identifier has just been read by read_token, otherwise * it gives the underlying default lexical token. */ HASHID lookup_name(string s, unsigned long h, int ext, int tok) { unsigned tag; unsigned long len; HASHID prev = NULL_hashid; HASHID nm = hash_table [h]; ASSERT (h == hash (s)); /* Search through existing entries */ while (!IS_NULL_hashid (nm)) { string t = DEREF_string (hashid_name_etc_text (nm)); int c = ustrcmp (t, s); if (c == 0) { /* Name matches */ return (nm); } if (c > 0) break; prev = nm; nm = DEREF_hashid (hashid_next (nm)); } /* Create new hash table entry */ len = (unsigned long) ustrlen (s); if (tok == lex_unknown) { s = ustring_ncopy (s, (size_t) len); tok = lex_identifier; } tag = hashid_name_tag; if (ext) { /* Check for extended identifiers */ string t = s; while (t = ustrchr (t, char_backslash), t != NULL) { t++; if (*t == char_u) { /* '\uxxxx' counts as one character */ len -= 5; } else { /* '\Uxxxxxxxx' counts as one character */ len -= 9; } } tag = hashid_ename_tag; } MAKE_hashid_name_etc (tag, nm, h, s, nm); if (IS_NULL_hashid (prev)) { hash_table [h] = nm; } else { COPY_hashid (hashid_next (prev), nm); } init_hashid (nm, tok); if (len >= max_id_length) { /* Check name length */ IGNORE check_value (OPT_VAL_name_limit, len, nm); } return (nm); } /* * CREATE A SPECIAL FUNCTION NAME * * This routine creates a constructor, destructor or conversion function * name (as indicated by tag) for the non-class type t named id. */ static HASHID lookup_special(TYPE t, IDENTIFIER id, unsigned tag) { unsigned long h = hash_type (t); HASHID prev = hash_type_table [h]; HASHID nm = prev; /* Search through existing entries */ while (!IS_NULL_hashid (nm)) { if (TAG_hashid (nm) == tag) { TYPE s = DEREF_type (hashid_constr_etc_type (nm)); if (eq_type (s, t)) { COPY_id (hashid_constr_etc_tid (nm), id); return (nm); } } nm = DEREF_hashid (hashid_next (nm)); } /* Create new hash table entry */ ASSERT (h < HASH_SIZE); MAKE_hashid_constr_etc (tag, prev, h, t, id, nm); init_hashid (nm, lex_identifier); hash_type_table [h] = nm; return (nm); } /* * CREATE THE CONSTRUCTOR FOR A TYPE * * This routine creates the hash table entry for the constructor of type t * and name id. */ HASHID lookup_constr(TYPE t, IDENTIFIER id) { HASHID nm; if (IS_type_compound (t)) { CLASS_TYPE ct = DEREF_ctype (type_compound_defn (t)); IDENTIFIER cid = DEREF_id (ctype_constr (ct)); if (IS_NULL_id (cid)) { /* Create class constructor */ NAMESPACE ns = DEREF_nspace (ctype_member (ct)); MAKE_hashid_constr (NULL_hashid, 0, t, id, nm); init_hashid (nm, lex_identifier); cid = DEREF_id (hashid_id (nm)); COPY_nspace (id_parent (cid), ns); COPY_id (ctype_constr (ct), cid); IGNORE search_member (ns, nm, 1); } else { nm = DEREF_hashid (id_name (cid)); } } else { nm = lookup_special (t, id, hashid_constr_tag); } return (nm); } /* * CREATE THE DESTRUCTOR FOR A TYPE * * This routine creates the hash table entry for the destructor of type t * and name id. */ HASHID lookup_destr(TYPE t, IDENTIFIER id) { HASHID nm; if (IS_type_compound (t)) { CLASS_TYPE ct = DEREF_ctype (type_compound_defn (t)); IDENTIFIER cid = DEREF_id (ctype_destr (ct)); if (IS_NULL_id (cid)) { /* Create class destructor */ NAMESPACE ns = DEREF_nspace (ctype_member (ct)); MAKE_hashid_destr (NULL_hashid, 1, t, id, nm); init_hashid (nm, lex_identifier); cid = DEREF_id (hashid_id (nm)); COPY_nspace (id_parent (cid), ns); COPY_id (ctype_destr (ct), cid); IGNORE search_member (ns, nm, 1); } else { nm = DEREF_hashid (id_name (cid)); } } else { nm = lookup_special (t, id, hashid_destr_tag); } return (nm); } /* * LOOK UP A CONVERSION FUNCTION NAME * * This routine returns the hash table entry for the conversion function * corresponding to the type t. */ HASHID lookup_conv(TYPE t) { HASHID nm = lookup_special (t, NULL_id, hashid_conv_tag); return (nm); } /* * OVERLOADED OPERATOR LOOK-UP TABLE * * This table gives the hash table entries for the overloaded operator * function names, 'operator +' etc. It gives a straight look-up depending * on the lexical token number of the operator. All ISO keyword and * digraphs are represented in their primary form. */ HASHID *hash_ops_table; /* * CREATE AN OPERATOR HASH TABLE ENTRY * * This routine creates a hash table entry for 'operator op' when op has * lexical token number t. */ static HASHID make_op(int t) { HASHID nm; unsigned long h = (unsigned long) t; MAKE_hashid_op (NULL_hashid, (h % HASH_SIZE), t, nm); init_hashid (nm, lex_identifier); return (nm); } /* * LOOK UP AN ANONYMOUS IDENTIFIER * * This routine creates a hash table entry for an anonymous identifier. * Note that each anonymous identifier gives a distinct hash table entry. */ HASHID lookup_anon(void) { HASHID nm; static unsigned long anon_no = 0; unsigned long a = anon_no++; MAKE_hashid_anon (NULL_hashid, (a % HASH_SIZE), a, nm); init_hashid (nm, lex_identifier); return (nm); } /* * EXPAND AN IDENTIFIER NAME * * This routine expands the identifier name nm. For example, if t is * a tokenised type defined to be int, then 'operator t' expands to * 'operator int'. ct gives the expansion type for constructors and * destructors. */ HASHID expand_name(HASHID nm, CLASS_TYPE ct) { switch (TAG_hashid (nm)) { case hashid_constr_tag : { /* Constructor names */ if (!IS_NULL_ctype (ct)) { IDENTIFIER id = DEREF_id (ctype_constr (ct)); nm = DEREF_hashid (id_name (id)); } else { TYPE t = DEREF_type (hashid_constr_type (nm)); TYPE s = expand_type (t, 1); if (!EQ_type (t, s)) { nm = lookup_constr (s, NULL_id); } } break; } case hashid_destr_tag : { /* Destructor names */ if (!IS_NULL_ctype (ct)) { IDENTIFIER id = DEREF_id (ctype_destr (ct)); nm = DEREF_hashid (id_name (id)); } else { TYPE t = DEREF_type (hashid_destr_type (nm)); TYPE s = expand_type (t, 1); if (!EQ_type (t, s)) { nm = lookup_destr (s, NULL_id); } } break; } case hashid_conv_tag : { /* Conversion function names */ TYPE t = DEREF_type (hashid_conv_type (nm)); TYPE s = expand_type (t, 1); if (!EQ_type (t, s)) { nm = lookup_conv (s); } break; } } return (nm); } /* * FIND NEXT VERSION OF AN EXPANDED IDENTIFIER NAME * * There is a complication in the expansion of conversion function names * in that when types are identified more than one name may refer to the * same type. This routine finds the next such possible name returning * the null identifier name to indicate that there are no more. */ HASHID next_expand_name(HASHID nm) { if (IS_hashid_conv (nm)) { int started = 0; TYPE t = DEREF_type (hashid_conv_type (nm)); unsigned long h = hash_type (t); HASHID pnm = hash_type_table [h]; while (!IS_NULL_hashid (pnm)) { if (EQ_hashid (pnm, nm)) { started = 1; } else if (started && IS_hashid_conv (pnm)) { TYPE s = DEREF_type (hashid_conv_type (pnm)); if (eq_type (s, t)) return (pnm); } pnm = DEREF_hashid (hashid_next (pnm)); } } return (NULL_hashid); } /* * FIND A HASH IDENTIFIER NUMBER * * This routine finds the lexical token number associated with the hash * identifier nm. For a keyword, whether active or not, this is the * associated value from syntax.h, otherwise it is lex_identifier. */ int find_hashid(HASHID nm) { unsigned long lex; IDENTIFIER id = DEREF_id (hashid_id (nm)); while (!IS_id_dummy (id)) { /* Scan to last hidden value */ id = DEREF_id (id_alias (id)); } lex = DEREF_ulong (id_no (id)); return ((int) lex); } /* * FIND AN UNDERLYING IDENTIFIER * * This routine finds the dummy identifier underlying id. */ IDENTIFIER underlying_id(IDENTIFIER id) { HASHID nm = DEREF_hashid (id_name (id)); id = DEREF_id (hashid_id (nm)); while (!IS_id_dummy (id)) { /* Scan to last hidden value */ id = DEREF_id (id_alias (id)); } return (id); } /* * SET THE LOCATION OF A COMPLEX IDENTIFIER * * The precise location of the last use of a hash identifier is stored * in the loc field of its associated dummy identifier. For simple * identifiers this is set in read_token, however for more complex * cases this routine sets the location of id to be the location of pid. */ void set_hashid_loc(IDENTIFIER id, IDENTIFIER pid) { if (!IS_NULL_id (pid)) { LOCATION loc; if (!IS_id_dummy (id)) id = underlying_id (id); if (!IS_id_dummy (pid)) pid = underlying_id (pid); DEREF_loc (id_loc (pid), loc); COPY_loc (id_loc (id), loc); } return; } /* * MODIFY AN IDENTIFIER NAME * * This routine modifies the name of the identifier id by adding a prime * to it. This is intended primarily for debugging purposes. */ void prime_name(IDENTIFIER id) { if (!IS_NULL_id (id)) { HASHID nm = DEREF_hashid (id_name (id)); if (IS_hashid_name (nm)) { string s = DEREF_string (hashid_name_text (nm)); s = ustring_concat (s, ustrlit ("'")); nm = lookup_name (s, hash (s), 0, lex_identifier); } COPY_hashid (id_name (id), nm); } return; } /* * KEYWORD HASH TABLE ENTRIES * * The table hash_keyword gives the hash table entries for the keywords. * These are numbered from LAST_KEYWORD to FIRST_KEYWORD. The array * should be accessed through the macro KEYWORD defined in hash.h, which * includes the appropriate offset. */ HASHID hash_keyword [ LAST_KEYWORD - FIRST_KEYWORD + 1 ]; IDENTIFIER underlying_op = NULL_id; /* * INITIALISE THE HASH TABLE * * This routine allocates space for the hash table and sets all its entries * to NULL. It also sets up the operator look-up table. */ void init_hash(void) { int n; unsigned long i; /* Set up identifier hash table */ hash_table = xmalloc_nof (HASHID, HASH_SIZE); for (i = 0; i < HASH_SIZE; i++) { hash_table [i] = NULL_hashid; } /* Set up type hash table */ hash_type_table = xmalloc_nof (HASHID, HASH_TYPE_SIZE); for (i = 0; i < HASH_TYPE_SIZE; i++) { hash_type_table [i] = NULL_hashid; } /* Set up operator look-up table */ hash_ops_table = xmalloc_nof (HASHID, LAST_TOKEN + 1); for (n = 0; n <= LAST_TOKEN; n++) { hash_ops_table [n] = NULL_hashid; } /* Allocate hash table entries for all symbols */ for (n = FIRST_C_SYMBOL; n <= LAST_C_SYMBOL; n++) { hash_ops_table [n] = make_op (n); } for (n = FIRST_CPP_SYMBOL; n <= LAST_CPP_SYMBOL; n++) { hash_ops_table [n] = make_op (n); } for (n = FIRST_EXTRA_SYMBOL; n <= LAST_EXTRA_SYMBOL; n++) { hash_ops_table [n] = make_op (n); } hash_ops_table [ lex_array_Hop ] = make_op (lex_array_Hop); hash_ops_table [ lex_cond_Hop ] = make_op (lex_cond_Hop); hash_ops_table [ lex_delete ] = make_op (lex_delete); hash_ops_table [ lex_delete_Harray ] = make_op (lex_delete_Harray); hash_ops_table [ lex_func_Hop ] = make_op (lex_func_Hop); hash_ops_table [ lex_new ] = make_op (lex_new); hash_ops_table [ lex_new_Harray ] = make_op (lex_new_Harray); hash_ops_table [ lex_alignof ] = make_op (lex_alignof); hash_ops_table [ lex_sizeof ] = make_op (lex_sizeof); hash_ops_table [ lex_typeid ] = make_op (lex_typeid); hash_ops_table [ lex_vtable ] = make_op (lex_vtable); /* Map secondary representations to primary representations */ for (n = FIRST_DIGRAPH; n <= LAST_DIGRAPH; n++) { int m = primary_form (n); hash_ops_table [n] = hash_ops_table [m]; } for (n = FIRST_ISO_KEYWORD; n <= LAST_ISO_KEYWORD; n++) { int m = primary_form (n); hash_ops_table [n] = hash_ops_table [m]; } /* This is necessary for the definition of KEYWORD */ ASSERT (FIRST_KEYWORD == lex_auto); return; }