/* * Copyright (c) 2002, The Tendra Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * Crown Copyright (c) 1997 * * This TenDRA(r) Computer Program is subject to Copyright * owned by the United Kingdom Secretary of State for Defence * acting through the Defence Evaluation and Research Agency * (DERA). It is made available to Recipients with a * royalty-free licence for its use, reproduction, transfer * to other parties and amendment for any purpose not excluding * product development provided that any such use et cetera * shall be deemed to be acceptance of the following conditions:- * * (1) Its Recipients shall ensure that this Notice is * reproduced upon any copies or amended versions of it; * * (2) Any amended version of it shall be clearly marked to * show both the nature of and the organisation responsible * for the relevant amendment or amendments; * * (3) Its onward transfer from a recipient to another * party shall be deemed to be that party's acceptance of * these conditions; * * (4) DERA gives no warranty or assurance as to its * quality or suitability for any purpose and DERA accepts * no liability whatsoever in relation to any use to which * it may be put. * * $TenDRA: tendra/src/producers/common/parse/macro.c,v 1.16 2004/08/21 15:31:42 stefanf Exp $ */ #include "config.h" #include "producer.h" #include "cstring.h" #include "fmm.h" #include "msgcat.h" #include "system.h" #include "version.h" #include "c_types.h" #include "hashid_ops.h" #include "id_ops.h" #include "nspace_ops.h" #include "error.h" #include "catalog.h" #include "option.h" #include "buffer.h" #include "char.h" #include "dump.h" #include "file.h" #include "hash.h" #include "lex.h" #include "literal.h" #include "macro.h" #include "namespace.h" #include "predict.h" #include "preproc.h" #include "print.h" #include "symbols.h" #include "syntax.h" #include "ustring.h" /* * LIST OF FREE LEXICAL TOKENS * * All the free lexical tokens are formed into a list. */ PPTOKEN *free_tokens = NULL; static LIST (PPTOKEN_P) alloc_tokens = NULL_list (PPTOKEN_P); /* * ALLOCATE A NEW TOKEN * * This routine allocates a new token from the list free_tokens. */ PPTOKEN* new_pptok(void) { PPTOKEN *p = free_tokens; if (p == NULL) { PPTOKEN *q; int i, n = 2000; p = xmalloc_nof (PPTOKEN, n); CONS_pptok (p, alloc_tokens, alloc_tokens); q = p; for (i = 1; i < n; i++) { q->next = q + 1; q++; } q->next = NULL; } free_tokens = p->next; p->pp_opts = real_opts; return (p); } /* * FREE A SINGLE TOKEN * * This macro frees the single token P by adding it to the list of all * free tokens. */ #define free_pptok(P)\ {\ (P)->next = free_tokens;\ free_tokens = (P);\ } /* * FREE A LIST OF TOKENS * * This routine adds the list of tokens p to the list of all free tokens. */ void free_tok_list(PPTOKEN *p) { PPTOKEN *q = p; if (q == NULL) return; while (q->next) q = q->next; q->next = free_tokens; free_tokens = p; return; } /* * FREE ALL ALLOCATED PREPROCESSING TOKENS * * This routine frees all the space allocated for preprocessing tokens. * It should only be called after the input has been completely processed. */ void term_macros(void) { LIST (PPTOKEN_P) p = alloc_tokens; while (!IS_NULL_list (p)) { PPTOKEN *q; DESTROY_CONS_pptok (destroy, q, p, p); xfree (q); } alloc_tokens = p; free_tokens = NULL; return; } /* * COPY A TOKEN * * This macro copies the contents of the token with token value T and data * Q into P. */ #define copy_pptok(P, T, Q)\ {\ (P)->tok = (T);\ (P)->pp_data = (Q)->pp_data;\ (P)->pp_opts = (Q)->pp_opts;\ (P)->pp_space = (Q)->pp_space;\ } /* * ASSIGN TOKEN COMPONENTS * * This routine assigns the token components for the token t, which has * just been read from the input file (or faked on occasions - these are * indicated) into p. It is only necessary to call this routine is T is * less than or equal to LAST_COMPLEX_TOKEN (defined in symbols.h). If any * cases are added to this routine then it may be necessary to change the * value of this macro. */ void token_parts(int t, PPTOKEN *p) { switch (t) { case lex_identifier : { /* Identifiers */ HASHID nm = token_hashid; IDENTIFIER id = DEREF_id (hashid_id (nm)); p->pp_data.id.hash = nm; p->pp_data.id.use = id; break; } case lex_char_Hlit : case lex_string_Hlit : case lex_wchar_Hlit : case lex_wstring_Hlit : { /* String and character literals */ string s1 = token_buff.start; size_t n = (size_t) (token_buff.posn - s1); string s2; if (n < 2) { /* Optimise for small strings */ s2 = ustring_copy (s1); } else { s2 = ustring_alloc (n + 1); xumemcpy (s2, s1, n); s2 [n] = 0; } p->pp_data.str.start = s2; p->pp_data.str.end = s2 + n; break; } case lex_integer_Hlit : { /* Integer and floating-point literals */ p->pp_data.text = ustring_copy (token_buff.start); break; } case lex_hash_Hif : case lex_hash_Helif : { /* Target dependent conditionals */ p->pp_data.exp = crt_hash_if_exp; break; } case lex_unknown : { /* Unknown characters */ int i; string s1 = token_buff.start; string s2 = p->pp_data.buff; ASSERT (MULTI_WIDTH <= sizeof (p->pp_data.buff)); for (i = 0; i < MULTI_WIDTH; i++) s2 [i] = s1 [i]; break; } } return; } /* * REMOVE ANY IGNORED TOKENS FROM A LIST * * This routine removes any ignored tokens from the list tok, returning * the result. */ PPTOKEN* clean_tok_list(PPTOKEN *toks) { unsigned long sp = 0; PPTOKEN p0, *p = &p0; PPTOKEN *q; p->next = toks; while (q = p->next, q != NULL) { if (q->tok == lex_ignore_token) { sp |= q->pp_space; p->next = q->next; free_pptok (q); q = p; } else { if (sp) { q->pp_space |= sp; sp = 0; } } p = q; } return (p0.next); } /* * READ A LINE OF TOKENS * * This routine reads the sequence of preprocessing tokens comprising a * preprocessing directive (for example, a macro definition). If t1 is * not lex_ignore_token then it is taken to be the first token in the * definition, similarly tn gives the last token. */ PPTOKEN* read_line(int t1, int tn) { int t = t1; unsigned long sp = 0; PPTOKEN dummy_tok, *this_tok = &dummy_tok; if (t == lex_ignore_token) { t = read_token (); update_column (); if (in_preproc_dir) preproc_loc = crt_loc; } while (t != lex_newline && t != lex_eof) { this_tok->next = new_pptok (); this_tok = this_tok->next; this_tok->tok = t; if (t <= LAST_COMPLEX_TOKEN) token_parts (t, this_tok); this_tok->pp_space = (sp & WHITE_MASK); sp = skip_white (0); t = read_token (); update_column (); if (in_preproc_dir) preproc_loc = crt_loc; } if (tn != lex_ignore_token) { this_tok->next = new_pptok (); this_tok = this_tok->next; this_tok->tok = tn; token_parts (tn, this_tok); this_tok->pp_space = (sp & WHITE_MASK); } this_tok->next = NULL; if (in_preproc_dir) IGNORE skip_to_end (); return (dummy_tok.next); } /* * COPY A LIST OF TOKENS * * This routine copies the list of tokens toks, excluding any ignored * tokens. If no tokens at all were copied, a placemarker token is * inserted and *have_placemarkers is set to 1. */ static PPTOKEN* copy_tok_list(PPTOKEN *toks, int *have_placemarkers) { PPTOKEN *ptr_tok; PPTOKEN dummy_tok, *this_tok = &dummy_tok; for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) { int t = ptr_tok->tok; if (t != lex_ignore_token) { this_tok->next = new_pptok (); this_tok = this_tok->next; copy_pptok (this_tok, t, ptr_tok); } } if (this_tok == &dummy_tok) { this_tok->next = new_pptok (); this_tok = this_tok->next; this_tok->tok = lex_placemarker; *have_placemarkers = 1; } this_tok->next = NULL; return (dummy_tok.next); } /* * STRINGISE A LIST OF TOKENS * * This routine turns the list of tokens toks into a string. The result * is built up in token_buff. If esc is true then any '"' (or whatever * the value of quote is) and '\' characters in string and character * literals (including the initial and terminating quotes) are preceded * by a '\'. This routine is used in the implementation of the # operator, * in macro #include directives and a couple of other preprocessing * directives. It returns 1 to indicate a valid string. */ int quote_tok_list(PPTOKEN *toks, int esc, int quote) { int res = 1; string st, se; int started = 0; int escaped = 0; PPTOKEN *ptr_tok; character qo = (character) quote; BUFFER *bf = clear_buffer (&token_buff, NULL); /* Scan through tokens */ for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) { character p, q; int t = ptr_tok->tok; if (t == lex_ignore_token) continue; /* Print initial space if necessary */ if (ptr_tok->pp_space && started) bfputc (bf, char_space); /* Find the token name */ switch (t) { case lex_identifier : { /* Identifiers */ HASHID nm = ptr_tok->pp_data.id.hash; st = DEREF_string (hashid_name_etc_text (nm)); bfputs (bf, st); break; } case lex_integer_Hlit : { /* Integer and floating-point literals */ st = ptr_tok->pp_data.text; bfputs (bf, st); break; } case lex_char_Hlit : { /* Character literals */ p = 0; q = char_single_quote; string_label : { st = ptr_tok->pp_data.str.start; se = ptr_tok->pp_data.str.end; /* Prefix and opening quote */ if (p) bfputc (bf, (int) p); if (esc && q == qo) bfputc (bf, char_backslash); bfputc (bf, (int) q); /* Copy string */ while (st != se) { character c = *(st++); if (c == qo || c == char_backslash) { /* Escaped characters */ if (esc) bfputc (bf, char_backslash); } bfputc (bf, (int) c); } /* Closing quote */ if (esc && q == qo) bfputc (bf, char_backslash); bfputc (bf, (int) q); } break; } case lex_wchar_Hlit : { /* Wide character literals */ p = char_L; q = char_single_quote; goto string_label; } case lex_string_Hlit : { /* String literals */ p = 0; q = char_quote; goto string_label; } case lex_wstring_Hlit : { /* Wide string literals */ p = char_L; q = char_quote; goto string_label; } case lex_unknown : { /* Unknown characters */ unsigned long u; int ch = CHAR_SIMPLE; u = get_multi_char (ptr_tok->pp_data.buff, &ch); if (ch == CHAR_SIMPLE) { bfputc (bf, (int) u); } else { print_char (u, ch, 0, bf); } break; } case lex_macro_Harg : { /* Macro parameters */ HASHID nm = ptr_tok->pp_data.par.hash; st = DEREF_string (hashid_name_etc_text (nm)); bfputs (bf, st); break; } default : { /* Symbols */ st = token_name (t); bfputs (bf, st); break; } } started = 1; } /* End of string */ bfputc (bf, 0); bf->posn--; /* Check for legal strings */ st = bf->start; se = bf->posn; while (st != se) { if (escaped) { escaped = 0; } else { character c = *st; if (c == qo) res = 0; if (c == char_backslash) escaped = 1; } st++; } if (escaped) res = 0; return (res); } /* * CONCATENATE TWO TOKENS * * This routine concatenates the two tokens p and q into a single token. * This is used to implement the ## operator. If the result is a valid * preprocessing token then p is overwritten by the result and 1 is * returned. Otherwise p and q are unchanged and 0 is returned. */ static int concat_pptoks(PPTOKEN *p, PPTOKEN *q) { int a = p->tok; int b = q->tok; unsigned long sa = p->pp_space; unsigned long sb = q->pp_space; p->pp_space = (sa | sb); q->pp_space = 0; if (a == lex_placemarker) { copy_pptok (p, b, q); return (1); } else if (b == lex_placemarker) { return (1); } if (a >= FIRST_SYMBOL && a <= LAST_SYMBOL) { if (b >= FIRST_SYMBOL && b <= LAST_SYMBOL) { /* Two symbols may combine to give another symbol */ int c; string s = token_buff.start; ustrcpy_v (s, token_name (a)); ustrcpy_v (s + ustrlen (s), token_name (b)); for (c = FIRST_SYMBOL; c <= LAST_SYMBOL; c++) { if (ustreq (s, token_name (c))) { /* Token found - check options */ p->tok = c; if (c >= FIRST_C_SYMBOL && c <= LAST_C_SYMBOL) { return (1); } #if LANGUAGE_CPP if (c >= FIRST_CPP_SYMBOL && c <= LAST_CPP_SYMBOL) { return (1); } #endif if (c >= FIRST_EXTRA_SYMBOL && c <= LAST_EXTRA_SYMBOL) { if (allow_extra_symbols) return (1); } if (c >= FIRST_DIGRAPH && c <= LAST_DIGRAPH) { if (allow_digraphs) return (1); } p->tok = a; } } return (0); } else if (a == lex_dot && b == lex_integer_Hlit) { /* A dot may start a number */ string s = q->pp_data.text; if (s [0] == char_dot) return (0); p->tok = lex_integer_Hlit; p->pp_data.text = ustring_concat (token_name (a), s); return (1); } else if (a == lex_backslash && b == lex_identifier) { /* A backslash may start a universal character */ /* NOT YET IMPLEMENTED */ /* EMPTY */ } } else if (a == lex_identifier) { HASHID nm = p->pp_data.id.hash; string s = DEREF_string (hashid_name_etc_text (nm)); if (b == lex_identifier) { /* Two identifiers give another identifier */ HASHID nm2 = q->pp_data.id.hash; string s2 = DEREF_string (hashid_name_etc_text (nm2)); s = ustring_concat (s, s2); nm = lookup_name (s, hash (s), 2, lex_identifier); p->pp_data.id.hash = nm; p->pp_data.id.use = DEREF_id (hashid_id (nm)); return (1); } else if (b == lex_integer_Hlit) { /* An identifier and a number may give an identifier */ character c; string n = q->pp_data.text; while (c = *(n++), c != 0) { if (c == char_dot || c == char_plus || c == char_minus) { /* The number must be entirely alphanumeric */ return (0); } } s = ustring_concat (s, q->pp_data.text); nm = lookup_name (s, hash (s), 2, lex_identifier); p->pp_data.id.hash = nm; p->pp_data.id.use = DEREF_id (hashid_id (nm)); return (1); } else if (s [0] == char_L && s [1] == 0) { /* An L may start a wide character or string */ if (b == lex_char_Hlit) { p->tok = lex_wchar_Hlit; p->pp_data.str.start = q->pp_data.str.start; p->pp_data.str.end = q->pp_data.str.end; return (1); } else if (b == lex_string_Hlit) { p->tok = lex_wstring_Hlit; p->pp_data.str.start = q->pp_data.str.start; p->pp_data.str.end = q->pp_data.str.end; return (1); } } } else if (a == lex_integer_Hlit) { string s = p->pp_data.text; if (b == lex_identifier) { /* A number followed by an identifier is a number */ HASHID nm = q->pp_data.id.hash; string s2 = DEREF_string (hashid_name_etc_text (nm)); p->pp_data.text = ustring_concat (s, s2); return (1); } else if (b == lex_integer_Hlit) { /* Two numbers form another number */ string s2 = q->pp_data.text; p->pp_data.text = ustring_concat (s, s2); return (1); } else if (b == lex_dot || b == lex_ellipsis) { /* A number followed by a sequence of dots is a number */ p->pp_data.text = ustring_concat (s, token_name (b)); return (1); } else if (b == lex_plus || b == lex_minus) { /* A sign may terminate a number after e or E */ unsigned n = (unsigned) ustrlen (s) - 1; if (s [n] == char_e || s [n] == char_E) { p->pp_data.text = ustring_concat (s, token_name (b)); return (1); } } } return (0); } /* * DUMMY LOCATION FOR INPUT FILE * * This dummy location represents tokens read directly from the input file. * If present, it will always be the last element of a list of token * locations. */ static PPTOKEN *dummy_loc_toks = NULL; static TOKEN_LOC dummy_loc = { &dummy_loc_toks, NULL }; TOKEN_LOC *file_loc = &dummy_loc; /* * FORWARD DECLARATION * * The functions expand_macro, expand_toks and expand_tok_list are defined * recursively. This gives the necessary forward declarations. */ static PPTOKEN *expand_toks(PPTOKEN *, TOKEN_LOC *, int); /* * HANDLE OLD STYLE STRINGISING * * This routine handles the old style stringising for the definition defn * for the given macro. Argument replacement has already been performed * on defn. If this facility is enabled then in macro definitions of the * form: * * #define f(X) "X" * * quotes are classified as unknown characters rather than string * terminators. This means that the X is recognised as a macro parameter * and is replaced during argument replacement. The job of this routine * is to spot these unrecognised quotes and turn them into proper strings. */ PPTOKEN* recognise_strings(PPTOKEN *defn, HASHID macro, int act) { PPTOKEN *this_tok = defn; PPTOKEN *last_tok = defn; while (this_tok != NULL) { if (this_tok->tok == lex_unknown) { unsigned long u; int ch = CHAR_SIMPLE; character qo = char_question; u = get_multi_char (this_tok->pp_data.buff, &ch); if (ch == CHAR_SIMPLE) qo = (character) u; if (qo == char_quote || qo == char_single_quote) { /* Start of string */ int t; int escaped = 0; PPTOKEN *next_tok = this_tok->next; PPTOKEN *ptr_tok = next_tok; while (ptr_tok != NULL) { t = ptr_tok->tok; if (t == lex_macro_Harg) { HASHID nm = ptr_tok->pp_data.par.hash; ERROR err = ERR_cpp_stringize_old (nm, macro); report (preproc_loc, err); } if (escaped) { escaped = 0; } else if (t == lex_unknown) { character qc = char_question; u = get_multi_char (ptr_tok->pp_data.buff, &ch); if (ch == CHAR_SIMPLE) qc = (character) u; if (qc == qo) break; if (qc == char_backslash) escaped = 1; } ptr_tok = ptr_tok->next; } if (act) { if (ptr_tok == NULL) { /* No closing quote */ report (crt_loc, ERR_cpp_stringize_bad (macro)); this_tok->next = NULL; } else { ptr_tok->tok = lex_ignore_token; this_tok->next = ptr_tok->next; ptr_tok->next = NULL; } /* Form the string */ if (!quote_tok_list (next_tok, 0, (int) qo)) { report (crt_loc, ERR_cpp_stringize_bad (macro)); } t = (qo == char_quote ? lex_string_Hlit : lex_char_Hlit); this_tok->tok = t; token_parts (t, this_tok); free_tok_list (next_tok); /* Check for wide strings */ if (last_tok->tok == lex_identifier) { string s; HASHID nm = last_tok->pp_data.id.hash; s = DEREF_string (hashid_name_etc_text (nm)); if (s [0] == char_L && s [1] == 0) { if (t == lex_string_Hlit) { t = lex_wstring_Hlit; } else { t = lex_wchar_Hlit; } copy_pptok (last_tok, t, this_tok); last_tok->next = this_tok->next; free_pptok (this_tok); this_tok = last_tok; } } } } } last_tok = this_tok; this_tok = this_tok->next; } return (defn); } /* * HANDLE TOKEN CONCATENATION * * This routine handles any ## operators in the definition defn of the * given macro. Note that any initial or terminal ## operators have * already been reported. */ static PPTOKEN* process_concat(PPTOKEN *defn, HASHID macro) { PPTOKEN *this_tok; while (defn && defn->tok == lex_hash_Hhash_Hop) { /* Check for initial ## */ this_tok = defn; defn = defn->next; free_pptok (this_tok); } this_tok = defn; while (this_tok != NULL) { PPTOKEN *next_tok = this_tok->next; if (next_tok == NULL) break; if (next_tok->tok == lex_hash_Hhash_Hop) { /* Delete the ## */ this_tok->next = next_tok->next; free_pptok (next_tok); /* Check for terminal ## */ if (this_tok->next == NULL) break; /* Do the token concatenation */ if (concat_pptoks (this_tok, this_tok->next)) { /* Delete the second argument if successful */ next_tok = this_tok->next; this_tok->next = next_tok->next; free_pptok (next_tok); } else { report (crt_loc, ERR_cpp_concat_bad (macro)); } /* Now reprocess this_tok */ } else { this_tok = next_tok; } } return (defn); } /* * REMOVE PLACEMARKER TOKENS * * This function removes all placemarker tokens after any ## operators * were processed. */ static PPTOKEN * remove_placemarkers(PPTOKEN *defn) { PPTOKEN dummy_tok; PPTOKEN *last_tok = &dummy_tok, *this_tok; this_tok = dummy_tok.next = defn; while (this_tok != NULL) { if (this_tok->tok == lex_placemarker) { last_tok->next = this_tok->next; free_pptok (this_tok); this_tok = last_tok->next; } else { last_tok = this_tok; this_tok = this_tok->next; } } return (dummy_tok.next); } /* * MAXIMUM NUMBER OF MACRO PARAMETERS * * This macro defines the maximum number of macro parameters which * expand_macro can handle without having to allocate temporary space * to hold them. With allocation the number of parameters is unlimited. */ #define MAX_MACRO_PARAMS 256 /* * EXPAND A MACRO DEFINITION * * This routine expands the macro given by the hash table entry macro. * The argument locs gives a list of locations where macro arguments can * be read from. locs will never be NULL. The argument complete is true * to indicate that this is a complete macro expansion, and that any * argument errors should be reported. If locs contains file_loc then * complete will always be true. When reading from file_loc we always * set in_preproc_dir to 2 to make read_token return lex_eof at the end * of each file, rather than automatically reverting to the including * file, and to cause it to ignore any preprocessing directives. * * Note that the entry for the macro in the hash table is marked during * expansion to prevent recursive expansions. Several points concerning * macro expansion are undefined; in this implementation: * * 1. Firstly, # operators are evaluated from left to right; * 2. Secondly, ## operators are evaluated from left to right; * 3. If a ## b is not a valid preprocessing token then it is * resolved to a b; * 4. A # operator in a function-like macro which is not followed * by a macro argument is ignored (it is left as # in object-like * macros of course); * 5. A ## operator at the start or end of a macro is ignored; * 6. Any preprocessing directives in the macro arguments are treated * as normal sequences of preprocessing tokens. * * A further undefined area concerns the ban on recursive macro expansions. * This is extended from the macro definition itself to any extra tokens * which are read during the expansion of the macro definition. For * example, in: * * #define f(a) a * g * #define g(a) f (a) * f (2) (9) * * the result is '2 * f (9)', rather than '2 * 9 * g'. */ PPTOKEN* expand_macro(HASHID macro, TOKEN_LOC *locs, int complete) { LOCATION loc; int state = 0; PPTOKEN *defn; unsigned long sp = 0; unsigned no_pars = 0; int va_macro = 0; int have_unknown = 0; int have_hash_hash = 0; int have_placemarkers = 0; unsigned long ws = crt_spaces; PPTOKEN dummy_tok, *this_tok = &dummy_tok; PPTOKEN *arg_array_base [ MAX_MACRO_PARAMS + 1 ]; PPTOKEN **arg_array = arg_array_base; /* Get the macro identifier */ IDENTIFIER id = DEREF_id (hashid_id (macro)); unsigned tag = TAG_id (id); DECL_SPEC ds = DEREF_dspec (id_storage (id)); /* Mark the macro as being used */ loc = crt_loc; ds |= dspec_used; COPY_dspec (id_storage (id), ds); if (do_macro && do_usage) dump_use (id, &crt_loc, 1); /* Get macro definition and other data */ if (tag == id_obj_macro_tag) { /* Object-like macros */ defn = DEREF_pptok (id_obj_macro_defn (id)); if (defn == NULL) return (NULL); if (ds & dspec_builtin) { /* Check built-in macros */ int t = defn->tok; if (t == lex_builtin_Hline) { /* Construct an integer literal for __LINE__ */ BUFFER *bf = clear_buffer (&token_buff, NULL); bfprintf (bf, "%lu", loc.line); bfputc (bf, 0); this_tok = new_pptok (); this_tok->tok = lex_integer_Hlit; this_tok->next = NULL; this_tok->pp_opts = NULL; this_tok->pp_space = 0; token_parts (lex_integer_Hlit, this_tok); return (this_tok); } if (t == lex_builtin_Hfile) { /* Construct a string literal for __FILE__ */ character c; string fn = DEREF_string (posn_file (crt_loc.posn)); BUFFER *bf = clear_buffer (&token_buff, NULL); if (DEREF_int (posn_quote_file (crt_loc.posn))) { while (c = *(fn++), c != 0) { if (c == char_quote || c == char_backslash) { /* Escape quotes and backslashes */ bfputc (bf, char_backslash); } bfputc (bf, (int) c); } } else { bfputs (bf, fn); } this_tok = new_pptok (); this_tok->tok = lex_string_Hlit; this_tok->next = NULL; this_tok->pp_opts = NULL; this_tok->pp_space = 0; token_parts (lex_string_Hlit, this_tok); return (this_tok); } } } else { /* Function-like macros */ int t; unsigned n; TOKEN_LOC *lc; int brackets = 0; unsigned no_args = 0; PPTOKEN *ptr_tok = NULL; TOKEN_LOC *ptr_loc = locs; /* Check for following open bracket */ for (;;) { if (ptr_loc == file_loc) { /* Read token from input location */ int legal = 1; sp = skip_white (1); if (peek_char (char_open_round, &legal)) { /* Next token in file is '(' */ update_column (); t = lex_open_Hround; } else { /* Other cases */ t = lex_unknown; if (sp) patch_white (sp); } break; } else if (ptr_loc == NULL) { /* No more locations */ t = lex_eof; break; } else { /* Read token from current location */ ptr_tok = (*(ptr_loc->toks))->next; while (ptr_tok && ptr_tok->tok == lex_ignore_token) { /* Step over any ignored tokens */ ptr_tok = ptr_tok->next; } if (ptr_tok != NULL) { /* Return the next token */ t = ptr_tok->tok; ptr_tok = ptr_tok->next; break; } /* Move on to next location */ ptr_loc = ptr_loc->next; } } /* Next token is not an open bracket */ if (t != lex_open_Hround) { if (complete) { report (loc, ERR_cpp_replace_arg_none (macro)); } incomplete_macro : { /* Return macro identifier */ this_tok = new_pptok (); this_tok->tok = lex_identifier; this_tok->next = NULL; this_tok->pp_space = 0; this_tok->pp_data.id.hash = macro; this_tok->pp_data.id.use = id; return (this_tok); } } /* Check argument array size */ no_pars = DEREF_unsigned (id_func_macro_no_params (id)); if (no_pars > MAX_MACRO_PARAMS) { arg_array = xmalloc_nof (PPTOKEN *, no_pars + 1); } va_macro = DEREF_int (id_func_macro_va_macro (id)); /* Scan macro arguments */ for (;;) { /* Get the next token */ int refill = 0; for (;;) { if (ptr_loc == file_loc) { /* Read token from file location */ sp = skip_white (1); in_preproc_dir = 2; t = read_token (); update_column (); if (t == lex_hash_H1 || t == lex_hash_H2) { if (sp & WHITE_NEWLINE) { /* Looks like preprocessing directive */ ERROR err = ERR_cpp_replace_arg_ppdir (macro); report (crt_loc, err); } } break; } else if (ptr_loc == NULL) { /* No more locations to read token from */ t = lex_eof; break; } else { /* Read token from next location */ if (refill) ptr_tok = (*(ptr_loc->toks))->next; if (ptr_tok != NULL) { t = ptr_tok->tok; break; } ptr_loc = ptr_loc->next; refill = 1; } } /* Examine this token */ if (t == lex_open_Hround) { brackets++; } else if (t == lex_close_Hround) { /* Close brackets mark the end of the argument list */ if (brackets == 0) break; brackets--; } else if (t == lex_comma && (!va_macro || no_args < no_pars - 1)) { /* Commas mark the end of an argument */ if (brackets == 0) { this_tok->next = NULL; no_args++; if (dummy_tok.next) { dummy_tok.next->pp_space = 0; } else if (complete) { ERROR err; err = ERR_cpp_replace_arg_empty (no_args, macro); report (crt_loc, err); /* Insert a placemarker. */ dummy_tok.next = new_pptok (); dummy_tok.next->tok = lex_placemarker; dummy_tok.next->next = NULL; have_placemarkers = 1; } if (no_args <= no_pars) { arg_array [ no_args ] = dummy_tok.next; } else { free_tok_list (dummy_tok.next); } if (ptr_tok) ptr_tok = ptr_tok->next; this_tok = &dummy_tok; continue; } } else if (t == lex_eof) { break; } /* Build up current argument */ this_tok->next = new_pptok (); this_tok = this_tok->next; if (ptr_tok) { copy_pptok (this_tok, t, ptr_tok); ptr_tok = ptr_tok->next; } else { this_tok->tok = t; if (t <= LAST_COMPLEX_TOKEN) token_parts (t, this_tok); this_tok->pp_space = (sp & WHITE_MASK); } } /* Create last argument */ in_preproc_dir = 0; this_tok->next = NULL; if (no_args || dummy_tok.next) { no_args++; if (dummy_tok.next) { dummy_tok.next->pp_space = 0; } else if (complete) { ERROR err = ERR_cpp_replace_arg_empty (no_args, macro); report (crt_loc, err); } if (no_args <= no_pars) { arg_array [ no_args ] = dummy_tok.next; } else { free_tok_list (dummy_tok.next); } } if (sp) patch_white (sp); this_tok = &dummy_tok; /* Check for incomplete argument lists */ if (t == lex_eof) { if (complete) { /* Report error, but carry on */ report (loc, ERR_cpp_replace_arg_eof (macro)); } else { /* Free those arguments actually read */ for (n = 1; n <= no_args && n <= no_pars; n++) { free_tok_list (arg_array [n]); } if (arg_array != arg_array_base) xfree (arg_array); goto incomplete_macro; } } /* Update location pointers */ if (ptr_loc) *(ptr_loc)->toks = ptr_tok; for (lc = locs; lc != ptr_loc; lc = lc->next) { *(lc)->toks = NULL; } /* According to C99, an invocation 'MACRO()' should be * interpreted as having a single empty argument if MACRO * was defined with one parameter. */ if (no_pars == 1 && no_args == 0) { report (crt_loc, ERR_cpp_replace_arg_empty (0, macro)); arg_array [1] = NULL; no_args = 1; have_placemarkers = 1; } /* Check that argument and parameter lists match */ if (no_pars != no_args && (!va_macro || no_pars > no_args)) { ERROR err; n = no_args; if (!va_macro) { err = ERR_cpp_replace_arg_number (macro, n, n, no_pars); } else { err = ERR_cpp_replace_arg_number2 (macro, n, n, no_pars); } report (crt_loc, err); /* Add extra arguments if there are not enough */ for (n = no_args + 1; n <= no_pars; n++) { arg_array [n] = NULL; } } IGNORE check_value (OPT_VAL_macro_args, (ulong) no_args); /* Get the macro definition */ defn = DEREF_pptok (id_func_macro_defn (id)); } crt_spaces = ws; /* Copy the definition, expanding macro arguments */ while (defn != NULL) { int t = defn->tok; if (t == lex_macro_Harg) { /* Macro argument - identified by argument number */ unsigned long n = defn->pp_data.par.no; PPTOKEN *arg = arg_array [n]; if (state == 0) { if (defn->next && defn->next->tok == lex_hash_Hhash_Hop) { /* Preceding ##, just copy argument */ this_tok->next = copy_tok_list (arg, &have_placemarkers); } else { /* Normal argument expansion */ TOKEN_LOC *arg_locs = NULL; this_tok->next = expand_toks (arg, arg_locs, 0); } } else if (state == 1) { /* Following #, fake reading a string literal */ this_tok->next = new_pptok (); if (!quote_tok_list (arg, 1, char_quote)) { report (crt_loc, ERR_cpp_stringize_bad (macro)); } this_tok->next->tok = lex_string_Hlit; token_parts (lex_string_Hlit, this_tok->next); this_tok->next->next = NULL; this_tok->next->pp_space = 0; } else { /* Following ##, just copy argument */ this_tok->next = copy_tok_list (arg, &have_placemarkers); } sp = defn->pp_space; if (sp && this_tok->next) { this_tok->next->pp_space = sp; sp = 0; } while (this_tok->next) this_tok = this_tok->next; state = 0; } else if (t == lex_hash_Hop) { /* Check for # operator */ state = 1; } else if (t != lex_ignore_token) { /* Copy other tokens */ this_tok->next = new_pptok (); this_tok = this_tok->next; copy_pptok (this_tok, t, defn); if (sp) { this_tok->pp_space = sp; sp = 0; } if (t == lex_hash_Hhash_Hop) { /* Check for ## operator */ have_hash_hash = 1; state = 2; } else { if (t == lex_unknown) have_unknown = 1; state = 0; } } defn = defn->next; } this_tok->next = NULL; defn = dummy_tok.next; /* Allow for argument expansion in strings */ if (have_unknown) defn = recognise_strings (defn, macro, 1); /* Rescan for ## directives */ if (have_hash_hash) defn = process_concat (defn, macro); if (have_placemarkers) defn = remove_placemarkers (defn); /* Rescan for further expansion (but not expanding macro) */ COPY_dspec (id_storage (id), (ds | dspec_temp)); this_tok = expand_toks (defn, locs, complete); free_tok_list (defn); defn = this_tok; COPY_dspec (id_storage (id), ds); /* Clean up after macro expansion */ if (tag == id_func_macro_tag) { /* Free the macro arguments */ unsigned n; for (n = 1; n <= no_pars; n++) free_tok_list (arg_array [n]); if (arg_array != arg_array_base) xfree (arg_array); } /* Return the result */ return (defn); } /* * EXPAND A LIST OF TOKENS * * This is the main macro expansion routine. It expands the list of macros * tok, returning the result. If toks ends in an unterminated function-like * macro then further tokens may be read from the locations given in locs. * The complete argument is as in expand_macro. */ static PPTOKEN* expand_toks(PPTOKEN *toks, TOKEN_LOC *locs, int complete) { PPTOKEN *ptr_tok; unsigned long sp = 0; PPTOKEN dummy_tok, *this_tok = &dummy_tok; /* Copy list of tokens */ for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) { int t = ptr_tok->tok; if (t == lex_ignore_token) { sp |= ptr_tok->pp_space; continue; } this_tok->next = new_pptok (); this_tok = this_tok->next; copy_pptok (this_tok, t, ptr_tok); if (sp) { this_tok->pp_space |= sp; sp = 0; } /* Check for macros */ if (t == lex_identifier) { HASHID m = ptr_tok->pp_data.id.hash; IDENTIFIER id = DEREF_id (hashid_id (m)); unsigned tag = TAG_id (id); switch (tag) { case id_obj_macro_tag : case id_func_macro_tag : { DECL_SPEC ds; TOKEN_LOC tloc; /* Check for non-expanding tokens */ if (IS_NULL_id (this_tok->pp_data.id.use)) { break; } /* Check for recursive macro definitions */ ds = DEREF_dspec (id_storage (id)); if (ds & dspec_temp) { /* Mark this token as non-expanding */ ERROR err = ERR_cpp_rescan_recursive (m); report (crt_loc, err); this_tok->pp_data.id.use = NULL_id; break; } /* Expand the macro using an extra location */ tloc.toks = &ptr_tok; tloc.next = locs; this_tok->tok = lex_ignore_token; this_tok->next = expand_macro (m, &tloc, complete); while (this_tok->next) this_tok = this_tok->next; break; } } if (ptr_tok == NULL) break; } } this_tok->next = NULL; return (dummy_tok.next); } /* * EXPAND A SIMPLE LIST OF TOKENS * * This routine is the simplest form of expand_toks, where toks is a * complete list, with no locations for reading further tokens. */ PPTOKEN* expand_tok_list(PPTOKEN *toks) { return (expand_toks (toks, NULL, 1)); } /* * ASSERTION NAMESPACE * * The assertions occupy a namespace distinct from all other namespaces, * including the macro namespace. */ NAMESPACE assert_namespace; /* * CREATE A BUILT-IN MACRO * * This routine creates a built-in macro named nm defined by a single * preprocessing token with token type t and associated data d. */ static void builtin_macro(const char *nm, int t, const char *d) { if (d) { IDENTIFIER id; string s = ustrlit (nm); unsigned long h = hash (s); HASHID macro = lookup_name (s, h, 0, lex_identifier); IDENTIFIER pid = DEREF_id (hashid_id (macro)); DECL_SPEC ds = (dspec_defn | dspec_builtin); /* Set up the token definition */ PPTOKEN *p = new_pptok (); p->tok = t; p->pp_space = 0; p->pp_opts = NULL; p->next = NULL; if (t == lex_integer_Hlit) { /* Set up associated integer data */ string c = ustrlit (string_copy (d)); p->pp_data.text = c; } else if (t == lex_string_Hlit) { /* Set up associated string data */ string c = ustrlit (string_copy (d)); p->pp_data.str.start = c; p->pp_data.str.end = c + ustrlen (c); } else if (t == lex_builtin_Hline || t == lex_builtin_Hfile) { /* Set up associated location data */ p->pp_space = crt_loc.column; p->pp_data.loc.line = crt_loc.line; p->pp_data.loc.posn = crt_loc.posn; } /* Define the macro */ MAKE_id_obj_macro (macro, ds, NULL_nspace, crt_loc, p, id); COPY_id (id_alias (id), pid); COPY_id (hashid_id (macro), id); if (do_macro) dump_declare (id, &crt_loc, 1); } return; } /* * INITIALISE BUILT-IN MACROS * * This routine initialises the built-in macros, and sets up the assertion * namespace. */ void init_macros(int m, int a) { const char *d = find_date ("%s %2d %d"); const char *t = find_time ("%.2d:%.2d:%.2d"); if (m) { /* Define built-in macros */ builtin_macro ("__LINE__", lex_builtin_Hline, "1"); builtin_macro ("__FILE__", lex_builtin_Hfile, ""); builtin_macro ("__DATE__", lex_string_Hlit, d); builtin_macro ("__TIME__", lex_string_Hlit, t); builtin_macro ("__STDC__", lex_integer_Hlit, C_VERSION); builtin_macro ("__STDC_VERSION__", lex_integer_Hlit, ISOC_VERSION); #if LANGUAGE_CPP builtin_macro ("__cplusplus", lex_integer_Hlit, CPP_VERSION); builtin_macro ("__tcpplus", lex_integer_Hlit, "1"); #else builtin_macro ("__tcpplus", lex_integer_Hlit, "0"); #endif } assert_namespace = make_global_nspace ("", 20); if (a) { /* Define built-in assertions */ IGNORE make_assert (KEYWORD (lex_include), lex_include); IGNORE make_assert (KEYWORD (lex_keyword), lex_keyword); IGNORE make_assert (KEYWORD (lex_option), lex_option); } return; }