ports//lang/TenDRA/work/tendra/src/producers/common/parse/macro.c

/*
 * Copyright (c) 2002, The Tendra Project <http://www.ten15.org/>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice unmodified, this list of conditions, and the following
 *    disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *
 *    		 Crown Copyright (c) 1997
 *    
 *    This TenDRA(r) Computer Program is subject to Copyright
 *    owned by the United Kingdom Secretary of State for Defence
 *    acting through the Defence Evaluation and Research Agency
 *    (DERA).  It is made available to Recipients with a
 *    royalty-free licence for its use, reproduction, transfer
 *    to other parties and amendment for any purpose not excluding
 *    product development provided that any such use et cetera
 *    shall be deemed to be acceptance of the following conditions:-
 *    
 *        (1) Its Recipients shall ensure that this Notice is
 *        reproduced upon any copies or amended versions of it;
 *    
 *        (2) Any amended version of it shall be clearly marked to
 *        show both the nature of and the organisation responsible
 *        for the relevant amendment or amendments;
 *    
 *        (3) Its onward transfer from a recipient to another
 *        party shall be deemed to be that party's acceptance of
 *        these conditions;
 *    
 *        (4) DERA gives no warranty or assurance as to its
 *        quality or suitability for any purpose and DERA accepts
 *        no liability whatsoever in relation to any use to which
 *        it may be put.
 *
 * $TenDRA: tendra/src/producers/common/parse/macro.c,v 1.16 2004/08/21 15:31:42 stefanf Exp $
 */


#include "config.h"
#include "producer.h"

#include "cstring.h"
#include "fmm.h"
#include "msgcat.h"

#include "system.h"
#include "version.h"
#include "c_types.h"
#include "hashid_ops.h"
#include "id_ops.h"
#include "nspace_ops.h"
#include "error.h"
#include "catalog.h"
#include "option.h"
#include "buffer.h"
#include "char.h"
#include "dump.h"
#include "file.h"
#include "hash.h"
#include "lex.h"
#include "literal.h"
#include "macro.h"
#include "namespace.h"
#include "predict.h"
#include "preproc.h"
#include "print.h"
#include "symbols.h"
#include "syntax.h"
#include "ustring.h"


/*
 *    LIST OF FREE LEXICAL TOKENS
 *
 *    All the free lexical tokens are formed into a list.
 */

PPTOKEN *free_tokens = NULL;
static LIST (PPTOKEN_P) alloc_tokens = NULL_list (PPTOKEN_P);


/*
 *    ALLOCATE A NEW TOKEN
 *
 *    This routine allocates a new token from the list free_tokens.
 */

PPTOKEN*
new_pptok(void)
{
    PPTOKEN *p = free_tokens;
    if (p == NULL) {
		PPTOKEN *q;
		int i, n = 2000;
		p = xmalloc_nof (PPTOKEN, n);
		CONS_pptok (p, alloc_tokens, alloc_tokens);
		q = p;
		for (i = 1; i < n; i++) {
			q->next = q + 1;
			q++;
		}
		q->next = NULL;
    }
    free_tokens = p->next;
    p->pp_opts = real_opts;
    return (p);
}


/*
 *    FREE A SINGLE TOKEN
 *
 *    This macro frees the single token P by adding it to the list of all
 *    free tokens.
 */

#define free_pptok(P)\
    {\
	(P)->next = free_tokens;\
	free_tokens = (P);\
    }


/*
 *    FREE A LIST OF TOKENS
 *
 *    This routine adds the list of tokens p to the list of all free tokens.
 */

void
free_tok_list(PPTOKEN *p)
{
    PPTOKEN *q = p;
    if (q == NULL) return;
    while (q->next) q = q->next;
    q->next = free_tokens;
    free_tokens = p;
    return;
}


/*
 *    FREE ALL ALLOCATED PREPROCESSING TOKENS
 *
 *    This routine frees all the space allocated for preprocessing tokens.
 *    It should only be called after the input has been completely processed.
 */

void
term_macros(void)
{
    LIST (PPTOKEN_P) p = alloc_tokens;
    while (!IS_NULL_list (p)) {
		PPTOKEN *q;
		DESTROY_CONS_pptok (destroy, q, p, p);
		xfree (q);
    }
    alloc_tokens = p;
    free_tokens = NULL;
    return;
}


/*
 *    COPY A TOKEN
 *
 *    This macro copies the contents of the token with token value T and data
 *    Q into P.
 */

#define copy_pptok(P, T, Q)\
    {\
	(P)->tok = (T);\
	(P)->pp_data = (Q)->pp_data;\
	(P)->pp_opts = (Q)->pp_opts;\
	(P)->pp_space = (Q)->pp_space;\
    }


/*
 *    ASSIGN TOKEN COMPONENTS
 *
 *    This routine assigns the token components for the token t, which has
 *    just been read from the input file (or faked on occasions - these are
 *    indicated) into p.  It is only necessary to call this routine is T is
 *    less than or equal to LAST_COMPLEX_TOKEN (defined in symbols.h).  If any
 *    cases are added to this routine then it may be necessary to change the
 *    value of this macro.
 */

void
token_parts(int t, PPTOKEN *p)
{
    switch (t) {
	case lex_identifier : {
	    /* Identifiers */
	    HASHID nm = token_hashid;
	    IDENTIFIER id = DEREF_id (hashid_id (nm));
	    p->pp_data.id.hash = nm;
	    p->pp_data.id.use = id;
	    break;
	}
	case lex_char_Hlit :
	case lex_string_Hlit :
	case lex_wchar_Hlit :
	case lex_wstring_Hlit : {
	    /* String and character literals */
	    string s1 = token_buff.start;
	    size_t n = (size_t) (token_buff.posn - s1);
	    string s2;
	    if (n < 2) {
			/* Optimise for small strings */
			s2 = ustring_copy (s1);
	    } else {
			s2 = ustring_alloc (n + 1);
			xumemcpy (s2, s1, n);
			s2 [n] = 0;
	    }
	    p->pp_data.str.start = s2;
	    p->pp_data.str.end = s2 + n;
	    break;
	}
	case lex_integer_Hlit : {
	    /* Integer and floating-point literals */
	    p->pp_data.text = ustring_copy (token_buff.start);
	    break;
	}
	case lex_hash_Hif :
	case lex_hash_Helif : {
	    /* Target dependent conditionals */
	    p->pp_data.exp = crt_hash_if_exp;
	    break;
	}
	case lex_unknown : {
	    /* Unknown characters */
	    int i;
	    string s1 = token_buff.start;
	    string s2 = p->pp_data.buff;
	    ASSERT (MULTI_WIDTH <= sizeof (p->pp_data.buff));
	    for (i = 0; i < MULTI_WIDTH; i++) s2 [i] = s1 [i];
	    break;
	}
    }
    return;
}


/*
 *    REMOVE ANY IGNORED TOKENS FROM A LIST
 *
 *    This routine removes any ignored tokens from the list tok, returning
 *    the result.
 */

PPTOKEN*
clean_tok_list(PPTOKEN *toks)
{
    unsigned long sp = 0;
    PPTOKEN p0, *p = &p0;
    PPTOKEN *q;
    p->next = toks;
    while (q = p->next, q != NULL) {
		if (q->tok == lex_ignore_token) {
			sp |= q->pp_space;
			p->next = q->next;
			free_pptok (q);
			q = p;
		} else {
			if (sp) {
				q->pp_space |= sp;
				sp = 0;
			}
		}
		p = q;
    }
    return (p0.next);
}


/*
 *    READ A LINE OF TOKENS
 *
 *    This routine reads the sequence of preprocessing tokens comprising a
 *    preprocessing directive (for example, a macro definition).  If t1 is
 *    not lex_ignore_token then it is taken to be the first token in the
 *    definition, similarly tn gives the last token.
 */

PPTOKEN*
read_line(int t1, int tn)
{
    int t = t1;
    unsigned long sp = 0;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok;
    if (t == lex_ignore_token) {
		t = read_token ();
		update_column ();
		if (in_preproc_dir) preproc_loc = crt_loc;
    }
    while (t != lex_newline && t != lex_eof) {
		this_tok->next = new_pptok ();
		this_tok = this_tok->next;
		this_tok->tok = t;
		if (t <= LAST_COMPLEX_TOKEN) token_parts (t, this_tok);
		this_tok->pp_space = (sp & WHITE_MASK);
		sp = skip_white (0);
		t = read_token ();
		update_column ();
		if (in_preproc_dir) preproc_loc = crt_loc;
    }
    if (tn != lex_ignore_token) {
		this_tok->next = new_pptok ();
		this_tok = this_tok->next;
		this_tok->tok = tn;
		token_parts (tn, this_tok);
		this_tok->pp_space = (sp & WHITE_MASK);
    }
    this_tok->next = NULL;
    if (in_preproc_dir) IGNORE skip_to_end ();
    return (dummy_tok.next);
}


/*
 *    COPY A LIST OF TOKENS
 *
 *    This routine copies the list of tokens toks, excluding any ignored
 *    tokens.  If no tokens at all were copied, a placemarker token is
 *    inserted and *have_placemarkers is set to 1.
 */

static PPTOKEN*
copy_tok_list(PPTOKEN *toks, int *have_placemarkers)
{
    PPTOKEN *ptr_tok;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok;
    for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) {
		int t = ptr_tok->tok;
		if (t != lex_ignore_token) {
			this_tok->next = new_pptok ();
			this_tok = this_tok->next;
			copy_pptok (this_tok, t, ptr_tok);
		}
    }
    if (this_tok == &dummy_tok) {
		this_tok->next = new_pptok ();
		this_tok = this_tok->next;
		this_tok->tok = lex_placemarker;
		*have_placemarkers = 1;
    }
    this_tok->next = NULL;
    return (dummy_tok.next);
}


/*
 *    STRINGISE A LIST OF TOKENS
 *
 *    This routine turns the list of tokens toks into a string.  The result
 *    is built up in token_buff.  If esc is true then any '"' (or whatever
 *    the value of quote is) and '\' characters in string and character
 *    literals (including the initial and terminating quotes) are preceded
 *    by a '\'.  This routine is used in the implementation of the # operator,
 *    in macro #include directives and a couple of other preprocessing
 *    directives.  It returns 1 to indicate a valid string.
 */

int
quote_tok_list(PPTOKEN *toks, int esc, int quote)
{
    int res = 1;
    string st, se;
    int started = 0;
    int escaped = 0;
    PPTOKEN *ptr_tok;
    character qo = (character) quote;
    BUFFER *bf = clear_buffer (&token_buff, NULL);
	
    /* Scan through tokens */
    for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) {
		character p, q;
		int t = ptr_tok->tok;
		if (t == lex_ignore_token) continue;
		
		/* Print initial space if necessary */
		if (ptr_tok->pp_space && started) bfputc (bf, char_space);
		
		/* Find the token name */
		switch (t) {
	    case lex_identifier : {
			/* Identifiers */
			HASHID nm = ptr_tok->pp_data.id.hash;
			st = DEREF_string (hashid_name_etc_text (nm));
			bfputs (bf, st);
			break;
	    }
	    case lex_integer_Hlit : {
			/* Integer and floating-point literals */
			st = ptr_tok->pp_data.text;
			bfputs (bf, st);
			break;
	    }
	    case lex_char_Hlit : {
			/* Character literals */
			p = 0;
			q = char_single_quote;
			string_label : {
				st = ptr_tok->pp_data.str.start;
				se = ptr_tok->pp_data.str.end;
				
				/* Prefix and opening quote */
				if (p) bfputc (bf, (int) p);
				if (esc && q == qo) bfputc (bf, char_backslash);
				bfputc (bf, (int) q);
				
				/* Copy string */
				while (st != se) {
					character c = *(st++);
					if (c == qo || c == char_backslash) {
						/* Escaped characters */
						if (esc) bfputc (bf, char_backslash);
					}
					bfputc (bf, (int) c);
				}
				
				/* Closing quote */
				if (esc && q == qo) bfputc (bf, char_backslash);
				bfputc (bf, (int) q);
			}
			break;
	    }
	    case lex_wchar_Hlit : {
			/* Wide character literals */
			p = char_L;
			q = char_single_quote;
			goto string_label;
	    }
	    case lex_string_Hlit : {
			/* String literals */
			p = 0;
			q = char_quote;
			goto string_label;
	    }
	    case lex_wstring_Hlit : {
			/* Wide string literals */
			p = char_L;
			q = char_quote;
			goto string_label;
	    }
	    case lex_unknown : {
			/* Unknown characters */
			unsigned long u;
			int ch = CHAR_SIMPLE;
			u = get_multi_char (ptr_tok->pp_data.buff, &ch);
			if (ch == CHAR_SIMPLE) {
				bfputc (bf, (int) u);
			} else {
				print_char (u, ch, 0, bf);
			}
			break;
	    }
	    case lex_macro_Harg : {
			/* Macro parameters */
			HASHID nm = ptr_tok->pp_data.par.hash;
			st = DEREF_string (hashid_name_etc_text (nm));
			bfputs (bf, st);
			break;
	    }
	    default : {
			/* Symbols */
			st = token_name (t);
			bfputs (bf, st);
			break;
	    }
		}
		started = 1;
    }
	
    /* End of string */
    bfputc (bf, 0);
    bf->posn--;
	
    /* Check for legal strings */
    st = bf->start;
    se = bf->posn;
    while (st != se) {
		if (escaped) {
			escaped = 0;
		} else {
			character c = *st;
			if (c == qo) res = 0;
			if (c == char_backslash) escaped = 1;
		}
		st++;
    }
    if (escaped) res = 0;
    return (res);
}


/*
 *    CONCATENATE TWO TOKENS
 *
 *    This routine concatenates the two tokens p and q into a single token.
 *    This is used to implement the ## operator.  If the result is a valid
 *    preprocessing token then p is overwritten by the result and 1 is
 *    returned.  Otherwise p and q are unchanged and 0 is returned.
 */

static int
concat_pptoks(PPTOKEN *p, PPTOKEN *q)
{
    int a = p->tok;
    int b = q->tok;
    unsigned long sa = p->pp_space;
    unsigned long sb = q->pp_space;
    p->pp_space = (sa | sb);
    q->pp_space = 0;
    if (a == lex_placemarker) {
		copy_pptok (p, b, q);
		return (1);
    } else if (b == lex_placemarker) {
		return (1);
    }
    if (a >= FIRST_SYMBOL && a <= LAST_SYMBOL) {
		if (b >= FIRST_SYMBOL && b <= LAST_SYMBOL) {
			/* Two symbols may combine to give another symbol */
			int c;
			string s = token_buff.start;
			ustrcpy_v (s, token_name (a));
			ustrcpy_v (s + ustrlen (s), token_name (b));
			for (c = FIRST_SYMBOL; c <= LAST_SYMBOL; c++) {
				if (ustreq (s, token_name (c))) {
					/* Token found - check options */
					p->tok = c;
					if (c >= FIRST_C_SYMBOL && c <= LAST_C_SYMBOL) {
						return (1);
					}
#if LANGUAGE_CPP
					if (c >= FIRST_CPP_SYMBOL && c <= LAST_CPP_SYMBOL) {
						return (1);
					}
#endif
					if (c >= FIRST_EXTRA_SYMBOL && c <= LAST_EXTRA_SYMBOL) {
						if (allow_extra_symbols) return (1);
					}
					if (c >= FIRST_DIGRAPH && c <= LAST_DIGRAPH) {
						if (allow_digraphs) return (1);
					}
					p->tok = a;
				}
			}
			return (0);
			
		} else if (a == lex_dot && b == lex_integer_Hlit) {
			/* A dot may start a number */
			string s = q->pp_data.text;
			if (s [0] == char_dot) return (0);
			p->tok = lex_integer_Hlit;
			p->pp_data.text = ustring_concat (token_name (a), s);
			return (1);
			
		} else if (a == lex_backslash && b == lex_identifier) {
			/* A backslash may start a universal character */
			/* NOT YET IMPLEMENTED */
			/* EMPTY */
		}
		
    } else if (a == lex_identifier) {
		HASHID nm = p->pp_data.id.hash;
		string s = DEREF_string (hashid_name_etc_text (nm));
		if (b == lex_identifier) {
			/* Two identifiers give another identifier */
			HASHID nm2 = q->pp_data.id.hash;
			string s2 = DEREF_string (hashid_name_etc_text (nm2));
			s = ustring_concat (s, s2);
			nm = lookup_name (s, hash (s), 2, lex_identifier);
			p->pp_data.id.hash = nm;
			p->pp_data.id.use = DEREF_id (hashid_id (nm));
			return (1);
			
		} else if (b == lex_integer_Hlit) {
			/* An identifier and a number may give an identifier */
			character c;
			string n = q->pp_data.text;
			while (c = *(n++), c != 0) {
				if (c == char_dot || c == char_plus || c == char_minus) {
					/* The number must be entirely alphanumeric */
					return (0);
				}
			}
			s = ustring_concat (s, q->pp_data.text);
			nm = lookup_name (s, hash (s), 2, lex_identifier);
			p->pp_data.id.hash = nm;
			p->pp_data.id.use = DEREF_id (hashid_id (nm));
			return (1);
			
		} else if (s [0] == char_L && s [1] == 0) {
			/* An L may start a wide character or string */
			if (b == lex_char_Hlit) {
				p->tok = lex_wchar_Hlit;
				p->pp_data.str.start = q->pp_data.str.start;
				p->pp_data.str.end = q->pp_data.str.end;
				return (1);
			} else if (b == lex_string_Hlit) {
				p->tok = lex_wstring_Hlit;
				p->pp_data.str.start = q->pp_data.str.start;
				p->pp_data.str.end = q->pp_data.str.end;
				return (1);
			}
		}
		
    } else if (a == lex_integer_Hlit) {
		string s = p->pp_data.text;
		if (b == lex_identifier) {
			/* A number followed by an identifier is a number */
			HASHID nm = q->pp_data.id.hash;
			string s2 = DEREF_string (hashid_name_etc_text (nm));
			p->pp_data.text = ustring_concat (s, s2);
			return (1);
			
		} else if (b == lex_integer_Hlit) {
			/* Two numbers form another number */
			string s2 = q->pp_data.text;
			p->pp_data.text = ustring_concat (s, s2);
			return (1);
			
		} else if (b == lex_dot || b == lex_ellipsis) {
			/* A number followed by a sequence of dots is a number */
			p->pp_data.text = ustring_concat (s, token_name (b));
			return (1);
			
		} else if (b == lex_plus || b == lex_minus) {
			/* A sign may terminate a number after e or E */
			unsigned n = (unsigned) ustrlen (s) - 1;
			if (s [n] == char_e || s [n] == char_E) {
				p->pp_data.text = ustring_concat (s, token_name (b));
				return (1);
			}
		}
    }
    return (0);
}


/*
 *    DUMMY LOCATION FOR INPUT FILE
 *
 *    This dummy location represents tokens read directly from the input file.
 *    If present, it will always be the last element of a list of token
 *    locations.
 */

static PPTOKEN *dummy_loc_toks = NULL;
static TOKEN_LOC dummy_loc = { &dummy_loc_toks, NULL };
TOKEN_LOC *file_loc = &dummy_loc;


/*
 *    FORWARD DECLARATION
 *
 *    The functions expand_macro, expand_toks and expand_tok_list are defined
 *    recursively.  This gives the necessary forward declarations.
 */

static PPTOKEN *expand_toks(PPTOKEN *, TOKEN_LOC *, int);


/*
 *    HANDLE OLD STYLE STRINGISING
 *
 *    This routine handles the old style stringising for the definition defn
 *    for the given macro.  Argument replacement has already been performed
 *    on defn.  If this facility is enabled then in macro definitions of the
 *    form:
 *
 *		#define f(X)	"X"
 *
 *    quotes are classified as unknown characters rather than string
 *    terminators.  This means that the X is recognised as a macro parameter
 *    and is replaced during argument replacement.  The job of this routine
 *    is to spot these unrecognised quotes and turn them into proper strings.
 */

PPTOKEN*
recognise_strings(PPTOKEN *defn, HASHID macro, int act)
{
    PPTOKEN *this_tok = defn;
    PPTOKEN *last_tok = defn;
    while (this_tok != NULL) {
		if (this_tok->tok == lex_unknown) {
			unsigned long u;
			int ch = CHAR_SIMPLE;
			character qo = char_question;
			u = get_multi_char (this_tok->pp_data.buff, &ch);
			if (ch == CHAR_SIMPLE) qo = (character) u;
			if (qo == char_quote || qo == char_single_quote) {
				/* Start of string */
				int t;
				int escaped = 0;
				PPTOKEN *next_tok = this_tok->next;
				PPTOKEN *ptr_tok = next_tok;
				while (ptr_tok != NULL) {
					t = ptr_tok->tok;
					if (t == lex_macro_Harg) {
						HASHID nm = ptr_tok->pp_data.par.hash;
						ERROR err = ERR_cpp_stringize_old (nm, macro);
						report (preproc_loc, err);
					}
					if (escaped) {
						escaped = 0;
					} else if (t == lex_unknown) {
						character qc = char_question;
						u = get_multi_char (ptr_tok->pp_data.buff, &ch);
						if (ch == CHAR_SIMPLE) qc = (character) u;
						if (qc == qo) break;
						if (qc == char_backslash) escaped = 1;
					}
					ptr_tok = ptr_tok->next;
				}
				if (act) {
					if (ptr_tok == NULL) {
						/* No closing quote */
						report (crt_loc, ERR_cpp_stringize_bad (macro));
						this_tok->next = NULL;
					} else {
						ptr_tok->tok = lex_ignore_token;
						this_tok->next = ptr_tok->next;
						ptr_tok->next = NULL;
					}
					
					/* Form the string */
					if (!quote_tok_list (next_tok, 0, (int) qo)) {
						report (crt_loc, ERR_cpp_stringize_bad (macro));
					}
					t = (qo == char_quote ? lex_string_Hlit : lex_char_Hlit);
					this_tok->tok = t;
					token_parts (t, this_tok);
					free_tok_list (next_tok);
					
					/* Check for wide strings */
					if (last_tok->tok == lex_identifier) {
						string s;
						HASHID nm = last_tok->pp_data.id.hash;
						s = DEREF_string (hashid_name_etc_text (nm));
						if (s [0] == char_L && s [1] == 0) {
							if (t == lex_string_Hlit) {
								t = lex_wstring_Hlit;
							} else {
								t = lex_wchar_Hlit;
							}
							copy_pptok (last_tok, t, this_tok);
							last_tok->next = this_tok->next;
							free_pptok (this_tok);
							this_tok = last_tok;
						}
					}
				}
			}
		}
		last_tok = this_tok;
		this_tok = this_tok->next;
    }
    return (defn);
}


/*
 *    HANDLE TOKEN CONCATENATION
 *
 *    This routine handles any ## operators in the definition defn of the
 *    given macro.  Note that any initial or terminal ## operators have
 *    already been reported.
 */

static PPTOKEN*
process_concat(PPTOKEN *defn, HASHID macro)
{
    PPTOKEN *this_tok;
    while (defn && defn->tok == lex_hash_Hhash_Hop) {
		/* Check for initial ## */
		this_tok = defn;
		defn = defn->next;
		free_pptok (this_tok);
    }
    this_tok = defn;
    while (this_tok != NULL) {
		PPTOKEN *next_tok = this_tok->next;
		if (next_tok == NULL) break;
		if (next_tok->tok == lex_hash_Hhash_Hop) {
			/* Delete the ## */
			this_tok->next = next_tok->next;
			free_pptok (next_tok);
			
			/* Check for terminal ## */
			if (this_tok->next == NULL) break;
			
			/* Do the token concatenation */
			if (concat_pptoks (this_tok, this_tok->next)) {
				/* Delete the second argument if successful */
				next_tok = this_tok->next;
				this_tok->next = next_tok->next;
				free_pptok (next_tok);
			} else {
				report (crt_loc, ERR_cpp_concat_bad (macro));
			}
			/* Now reprocess this_tok */
		} else {
			this_tok = next_tok;
		}
    }
    return (defn);
}


/*
 *   REMOVE PLACEMARKER TOKENS
 *
 *   This function removes all placemarker tokens after any ## operators
 *   were processed.
 */

static PPTOKEN *
remove_placemarkers(PPTOKEN *defn)
{
	PPTOKEN dummy_tok;
	PPTOKEN *last_tok = &dummy_tok, *this_tok;
	this_tok = dummy_tok.next = defn;
	while (this_tok != NULL) {
		if (this_tok->tok == lex_placemarker) {
			last_tok->next = this_tok->next;
			free_pptok (this_tok);
			this_tok = last_tok->next;
		} else {
			last_tok = this_tok;
			this_tok = this_tok->next;
		}
	}
	return (dummy_tok.next);
}


/*
 *    MAXIMUM NUMBER OF MACRO PARAMETERS
 *
 *    This macro defines the maximum number of macro parameters which
 *    expand_macro can handle without having to allocate temporary space
 *    to hold them.  With allocation the number of parameters is unlimited.
 */

#define MAX_MACRO_PARAMS	256


/*
 *    EXPAND A MACRO DEFINITION
 *
 *    This routine expands the macro given by the hash table entry macro.
 *    The argument locs gives a list of locations where macro arguments can
 *    be read from.  locs will never be NULL.  The argument complete is true
 *    to indicate that this is a complete macro expansion, and that any
 *    argument errors should be reported.  If locs contains file_loc then
 *    complete will always be true.  When reading from file_loc we always
 *    set in_preproc_dir to 2 to make read_token return lex_eof at the end
 *    of each file, rather than automatically reverting to the including
 *    file, and to cause it to ignore any preprocessing directives.
 *
 *    Note that the entry for the macro in the hash table is marked during
 *    expansion to prevent recursive expansions.  Several points concerning
 *    macro expansion are undefined; in this implementation:
 *
 *	1.  Firstly, # operators are evaluated from left to right;
 *	2.  Secondly, ## operators are evaluated from left to right;
 *	3.  If a ## b is not a valid preprocessing token then it is
 *	    resolved to a b;
 *	4.  A # operator in a function-like macro which is not followed
 *	    by a macro argument is ignored (it is left as # in object-like
 *	    macros of course);
 *	5.  A ## operator at the start or end of a macro is ignored;
 *	6.  Any preprocessing directives in the macro arguments are treated
 *	    as normal sequences of preprocessing tokens.
 *
 *    A further undefined area concerns the ban on recursive macro expansions.
 *    This is extended from the macro definition itself to any extra tokens
 *    which are read during the expansion of the macro definition.  For
 *    example, in:
 *
 *		    #define f(a)	a * g
 *		    #define g(a)	f (a)
 *		    f (2) (9)
 *
 *    the result is '2 * f (9)', rather than '2 * 9 * g'.
 */

PPTOKEN*
expand_macro(HASHID macro, TOKEN_LOC *locs, int complete)
{
    LOCATION loc;
    int state = 0;
    PPTOKEN *defn;
    unsigned long sp = 0;
    unsigned no_pars = 0;
    int va_macro = 0;
    int have_unknown = 0;
    int have_hash_hash = 0;
    int have_placemarkers = 0;
    unsigned long ws = crt_spaces;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok;
    PPTOKEN *arg_array_base [ MAX_MACRO_PARAMS + 1 ];
    PPTOKEN **arg_array = arg_array_base;
	
    /* Get the macro identifier */
    IDENTIFIER id = DEREF_id (hashid_id (macro));
    unsigned tag = TAG_id (id);
    DECL_SPEC ds = DEREF_dspec (id_storage (id));
	
    /* Mark the macro as being used */
    loc = crt_loc;
    ds |= dspec_used;
    COPY_dspec (id_storage (id), ds);
    if (do_macro && do_usage) dump_use (id, &crt_loc, 1);
	
    /* Get macro definition and other data */
    if (tag == id_obj_macro_tag) {
		/* Object-like macros */
		defn = DEREF_pptok (id_obj_macro_defn (id));
		if (defn == NULL) return (NULL);
		
		if (ds & dspec_builtin) {
			/* Check built-in macros */
			int t = defn->tok;
			if (t == lex_builtin_Hline) {
				/* Construct an integer literal for __LINE__ */
				BUFFER *bf = clear_buffer (&token_buff, NULL);
				bfprintf (bf, "%lu", loc.line);
				bfputc (bf, 0);
				this_tok = new_pptok ();
				this_tok->tok = lex_integer_Hlit;
				this_tok->next = NULL;
				this_tok->pp_opts = NULL;
				this_tok->pp_space = 0;
				token_parts (lex_integer_Hlit, this_tok);
				return (this_tok);
			}
			
			if (t == lex_builtin_Hfile) {
				/* Construct a string literal for __FILE__ */
				character c;
				string fn = DEREF_string (posn_file (crt_loc.posn));
				BUFFER *bf = clear_buffer (&token_buff, NULL);
				if (DEREF_int (posn_quote_file (crt_loc.posn))) {
					while (c = *(fn++), c != 0) {
						if (c == char_quote || c == char_backslash) {
							/* Escape quotes and backslashes */
							bfputc (bf, char_backslash);
						}
						bfputc (bf, (int) c);
					}
				} else {
					bfputs (bf, fn);
				}
				this_tok = new_pptok ();
				this_tok->tok = lex_string_Hlit;
				this_tok->next = NULL;
				this_tok->pp_opts = NULL;
				this_tok->pp_space = 0;
				token_parts (lex_string_Hlit, this_tok);
				return (this_tok);
			}
		}
		
    } else {
		/* Function-like macros */
		int t;
		unsigned n;
		TOKEN_LOC *lc;
		int brackets = 0;
		unsigned no_args = 0;
		PPTOKEN *ptr_tok = NULL;
		TOKEN_LOC *ptr_loc = locs;
		
		/* Check for following open bracket */
		for (;;) {
			if (ptr_loc == file_loc) {
				/* Read token from input location */
				int legal = 1;
				sp = skip_white (1);
				if (peek_char (char_open_round, &legal)) {
					/* Next token in file is '(' */
					update_column ();
					t = lex_open_Hround;
				} else {
					/* Other cases */
					t = lex_unknown;
					if (sp) patch_white (sp);
				}
				break;
			} else if (ptr_loc == NULL) {
				/* No more locations */
				t = lex_eof;
				break;
			} else {
				/* Read token from current location */
				ptr_tok = (*(ptr_loc->toks))->next;
				while (ptr_tok && ptr_tok->tok == lex_ignore_token) {
					/* Step over any ignored tokens */
					ptr_tok = ptr_tok->next;
				}
				if (ptr_tok != NULL) {
					/* Return the next token */
					t = ptr_tok->tok;
					ptr_tok = ptr_tok->next;
					break;
				}
				/* Move on to next location */
				ptr_loc = ptr_loc->next;
			}
		}
		
		/* Next token is not an open bracket */
		if (t != lex_open_Hround) {
			if (complete) {
				report (loc, ERR_cpp_replace_arg_none (macro));
			}
			incomplete_macro : {
				/* Return macro identifier */
				this_tok = new_pptok ();
				this_tok->tok = lex_identifier;
				this_tok->next = NULL;
				this_tok->pp_space = 0;
				this_tok->pp_data.id.hash = macro;
				this_tok->pp_data.id.use = id;
				return (this_tok);
			}
		}
		
		/* Check argument array size */
		no_pars = DEREF_unsigned (id_func_macro_no_params (id));
		if (no_pars > MAX_MACRO_PARAMS) {
			arg_array = xmalloc_nof (PPTOKEN *, no_pars + 1);
		}
		va_macro = DEREF_int (id_func_macro_va_macro (id));
		
		/* Scan macro arguments */
		for (;;) {
			/* Get the next token */
			int refill = 0;
			for (;;) {
				if (ptr_loc == file_loc) {
					/* Read token from file location */
					sp = skip_white (1);
					in_preproc_dir = 2;
					t = read_token ();
					update_column ();
					if (t == lex_hash_H1 || t == lex_hash_H2) {
						if (sp & WHITE_NEWLINE) {
							/* Looks like preprocessing directive */
							ERROR err = ERR_cpp_replace_arg_ppdir (macro);
							report (crt_loc, err);
						}
					}
					break;
				} else if (ptr_loc == NULL) {
					/* No more locations to read token from */
					t = lex_eof;
					break;
				} else {
					/* Read token from next location */
					if (refill) ptr_tok = (*(ptr_loc->toks))->next;
					if (ptr_tok != NULL) {
						t = ptr_tok->tok;
						break;
					}
					ptr_loc = ptr_loc->next;
					refill = 1;
				}
			}
			
			/* Examine this token */
			if (t == lex_open_Hround) {
				brackets++;
			} else if (t == lex_close_Hround) {
				/* Close brackets mark the end of the argument list */
				if (brackets == 0) break;
				brackets--;
			} else if (t == lex_comma && (!va_macro || no_args < no_pars - 1)) {
				/* Commas mark the end of an argument */
				if (brackets == 0) {
					this_tok->next = NULL;
					no_args++;
					if (dummy_tok.next) {
						dummy_tok.next->pp_space = 0;
					} else if (complete) {
						ERROR err;
						err = ERR_cpp_replace_arg_empty (no_args, macro);
						report (crt_loc, err);
						/* Insert a placemarker. */
						dummy_tok.next = new_pptok ();
						dummy_tok.next->tok = lex_placemarker;
						dummy_tok.next->next = NULL;
						have_placemarkers = 1;
					}
					if (no_args <= no_pars) {
						arg_array [ no_args ] = dummy_tok.next;
					} else {
						free_tok_list (dummy_tok.next);
					}
					if (ptr_tok) ptr_tok = ptr_tok->next;
					this_tok = &dummy_tok;
					continue;
				}
			} else if (t == lex_eof) {
				break;
			}
			
			/* Build up current argument */
			this_tok->next = new_pptok ();
			this_tok = this_tok->next;
			if (ptr_tok) {
				copy_pptok (this_tok, t, ptr_tok);
				ptr_tok = ptr_tok->next;
			} else {
				this_tok->tok = t;
				if (t <= LAST_COMPLEX_TOKEN) token_parts (t, this_tok);
				this_tok->pp_space = (sp & WHITE_MASK);
			}
		}
		
		/* Create last argument */
		in_preproc_dir = 0;
		this_tok->next = NULL;
		if (no_args || dummy_tok.next) {
			no_args++;
			if (dummy_tok.next) {
				dummy_tok.next->pp_space = 0;
			} else if (complete) {
				ERROR err = ERR_cpp_replace_arg_empty (no_args, macro);
				report (crt_loc, err);
			}
			if (no_args <= no_pars) {
				arg_array [ no_args ] = dummy_tok.next;
			} else {
				free_tok_list (dummy_tok.next);
			}
		}
		if (sp) patch_white (sp);
		this_tok = &dummy_tok;
		
		/* Check for incomplete argument lists */
		if (t == lex_eof) {
			if (complete) {
				/* Report error, but carry on */
				report (loc, ERR_cpp_replace_arg_eof (macro));
			} else {
				/* Free those arguments actually read */
				for (n = 1; n <= no_args && n <= no_pars; n++) {
					free_tok_list (arg_array [n]);
				}
				if (arg_array != arg_array_base) xfree (arg_array);
				goto incomplete_macro;
			}
		}
		
		/* Update location pointers */
		if (ptr_loc) *(ptr_loc)->toks = ptr_tok;
		for (lc = locs; lc != ptr_loc; lc = lc->next) {
			*(lc)->toks = NULL;
		}
		
		/* According to C99, an invocation 'MACRO()' should be
		 * interpreted as having a single empty argument if MACRO
		 * was defined with one parameter. */
		if (no_pars == 1 && no_args == 0) {
			report (crt_loc, ERR_cpp_replace_arg_empty (0, macro));
			arg_array [1] = NULL;
			no_args = 1;
			have_placemarkers = 1;
		}

		/* Check that argument and parameter lists match */
		if (no_pars != no_args && (!va_macro || no_pars > no_args)) {
			ERROR err;
			n = no_args;
			if (!va_macro) {
				err = ERR_cpp_replace_arg_number (macro, n, n, no_pars);
			} else {
				err = ERR_cpp_replace_arg_number2 (macro, n, n, no_pars);
			}
			report (crt_loc, err);
			
			/* Add extra arguments if there are not enough */
			for (n = no_args + 1; n <= no_pars; n++) {
				arg_array [n] = NULL;
			}
		}
		IGNORE check_value (OPT_VAL_macro_args, (ulong) no_args);
		
		/* Get the macro definition */
		defn = DEREF_pptok (id_func_macro_defn (id));
    }
    crt_spaces = ws;
	
    /* Copy the definition, expanding macro arguments */
    while (defn != NULL) {
		int t = defn->tok;
		
		if (t == lex_macro_Harg) {
			/* Macro argument - identified by argument number */
			unsigned long n = defn->pp_data.par.no;
			PPTOKEN *arg = arg_array [n];
			
			if (state == 0) {
				if (defn->next && defn->next->tok == lex_hash_Hhash_Hop) {
					/* Preceding ##, just copy argument */
					this_tok->next = copy_tok_list (arg, &have_placemarkers);
				} else {
					/* Normal argument expansion */
					TOKEN_LOC *arg_locs = NULL;
					this_tok->next = expand_toks (arg, arg_locs, 0);
				}
				
			} else if (state == 1) {
				/* Following #, fake reading a string literal */
				this_tok->next = new_pptok ();
				if (!quote_tok_list (arg, 1, char_quote)) {
					report (crt_loc, ERR_cpp_stringize_bad (macro));
				}
				this_tok->next->tok = lex_string_Hlit;
				token_parts (lex_string_Hlit, this_tok->next);
				this_tok->next->next = NULL;
				this_tok->next->pp_space = 0;
				
			} else {
				/* Following ##, just copy argument */
				this_tok->next = copy_tok_list (arg, &have_placemarkers);
			}
			
			sp = defn->pp_space;
			if (sp && this_tok->next) {
				this_tok->next->pp_space = sp;
				sp = 0;
			}
			while (this_tok->next) this_tok = this_tok->next;
			state = 0;
			
		} else if (t == lex_hash_Hop) {
			/* Check for # operator */
			state = 1;
			
		} else if (t != lex_ignore_token) {
			/* Copy other tokens */
			this_tok->next = new_pptok ();
			this_tok = this_tok->next;
			copy_pptok (this_tok, t, defn);
			if (sp) {
				this_tok->pp_space = sp;
				sp = 0;
			}
			if (t == lex_hash_Hhash_Hop) {
				/* Check for ## operator */
				have_hash_hash = 1;
				state = 2;
			} else {
				if (t == lex_unknown) have_unknown = 1;
				state = 0;
			}
		}
		defn = defn->next;
    }
    this_tok->next = NULL;
    defn = dummy_tok.next;
	
    /* Allow for argument expansion in strings */
    if (have_unknown) defn = recognise_strings (defn, macro, 1);
	
    /* Rescan for ## directives */
    if (have_hash_hash) defn = process_concat (defn, macro);
	
    if (have_placemarkers) defn = remove_placemarkers (defn);

    /* Rescan for further expansion (but not expanding macro) */
    COPY_dspec (id_storage (id), (ds | dspec_temp));
    this_tok = expand_toks (defn, locs, complete);
    free_tok_list (defn);
    defn = this_tok;
    COPY_dspec (id_storage (id), ds);
	
    /* Clean up after macro expansion */
    if (tag == id_func_macro_tag) {
		/* Free the macro arguments */
		unsigned n;
		for (n = 1; n <= no_pars; n++) free_tok_list (arg_array [n]);
		if (arg_array != arg_array_base) xfree (arg_array);
    }
	
    /* Return the result */
    return (defn);
}


/*
 *    EXPAND A LIST OF TOKENS
 *
 *    This is the main macro expansion routine.  It expands the list of macros
 *    tok, returning the result.  If toks ends in an unterminated function-like
 *    macro then further tokens may be read from the locations given in locs.
 *    The complete argument is as in expand_macro.
 */

static PPTOKEN*
expand_toks(PPTOKEN *toks, TOKEN_LOC *locs, int complete)
{
    PPTOKEN *ptr_tok;
    unsigned long sp = 0;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok;
	
    /* Copy list of tokens */
    for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) {
		int t = ptr_tok->tok;
		if (t == lex_ignore_token) {
			sp |= ptr_tok->pp_space;
			continue;
		}
		this_tok->next = new_pptok ();
		this_tok = this_tok->next;
		copy_pptok (this_tok, t, ptr_tok);
		if (sp) {
			this_tok->pp_space |= sp;
			sp = 0;
		}
		
		/* Check for macros */
		if (t == lex_identifier) {
			HASHID m = ptr_tok->pp_data.id.hash;
			IDENTIFIER id = DEREF_id (hashid_id (m));
			unsigned tag = TAG_id (id);
			switch (tag) {
			case id_obj_macro_tag :
			case id_func_macro_tag : {
				DECL_SPEC ds;
				TOKEN_LOC tloc;
				
				/* Check for non-expanding tokens */
				if (IS_NULL_id (this_tok->pp_data.id.use)) {
					break;
				}
				
				/* Check for recursive macro definitions */
				ds = DEREF_dspec (id_storage (id));
				if (ds & dspec_temp) {
					/* Mark this token as non-expanding */
					ERROR err = ERR_cpp_rescan_recursive (m);
					report (crt_loc, err);
					this_tok->pp_data.id.use = NULL_id;
					break;
				}
				
				/* Expand the macro using an extra location */
				tloc.toks = &ptr_tok;
				tloc.next = locs;
				this_tok->tok = lex_ignore_token;
				this_tok->next = expand_macro (m, &tloc, complete);
				while (this_tok->next) this_tok = this_tok->next;
				break;
			}
			}
			if (ptr_tok == NULL) break;
		}
    }
    this_tok->next = NULL;
    return (dummy_tok.next);
}


/*
 *    EXPAND A SIMPLE LIST OF TOKENS
 *
 *    This routine is the simplest form of expand_toks, where toks is a
 *    complete list, with no locations for reading further tokens.
 */

PPTOKEN*
expand_tok_list(PPTOKEN *toks)
{
    return (expand_toks (toks, NULL, 1));
}


/*
 *    ASSERTION NAMESPACE
 *
 *    The assertions occupy a namespace distinct from all other namespaces,
 *    including the macro namespace.
 */

NAMESPACE assert_namespace;


/*
 *    CREATE A BUILT-IN MACRO
 *
 *    This routine creates a built-in macro named nm defined by a single
 *    preprocessing token with token type t and associated data d.
 */

static void
builtin_macro(const char *nm, int t, const char *d)
{
    if (d) {
		IDENTIFIER id;
		string s = ustrlit (nm);
		unsigned long h = hash (s);
		HASHID macro = lookup_name (s, h, 0, lex_identifier);
		IDENTIFIER pid = DEREF_id (hashid_id (macro));
		DECL_SPEC ds = (dspec_defn | dspec_builtin);
		
		/* Set up the token definition */
		PPTOKEN *p = new_pptok ();
		p->tok = t;
		p->pp_space = 0;
		p->pp_opts = NULL;
		p->next = NULL;
		if (t == lex_integer_Hlit) {
			/* Set up associated integer data */
			string c = ustrlit (string_copy (d));
			p->pp_data.text = c;
		} else if (t == lex_string_Hlit) {
			/* Set up associated string data */
			string c = ustrlit (string_copy (d));
			p->pp_data.str.start = c;
			p->pp_data.str.end = c + ustrlen (c);
		} else if (t == lex_builtin_Hline || t == lex_builtin_Hfile) {
			/* Set up associated location data */
			p->pp_space = crt_loc.column;
			p->pp_data.loc.line = crt_loc.line;
			p->pp_data.loc.posn = crt_loc.posn;
		}
		
		/* Define the macro */
		MAKE_id_obj_macro (macro, ds, NULL_nspace, crt_loc, p, id);
		COPY_id (id_alias (id), pid);
		COPY_id (hashid_id (macro), id);
		if (do_macro) dump_declare (id, &crt_loc, 1);
    }
    return;
}


/*
 *    INITIALISE BUILT-IN MACROS
 *
 *    This routine initialises the built-in macros, and sets up the assertion
 *    namespace.
 */

void
init_macros(int m, int a)
{
    const char *d = find_date ("%s %2d %d");
    const char *t = find_time ("%.2d:%.2d:%.2d");
    if (m) {
		/* Define built-in macros */
		builtin_macro ("__LINE__", lex_builtin_Hline, "1");
		builtin_macro ("__FILE__", lex_builtin_Hfile, "<unknown>");
		builtin_macro ("__DATE__", lex_string_Hlit, d);
		builtin_macro ("__TIME__", lex_string_Hlit, t);
		builtin_macro ("__STDC__", lex_integer_Hlit, C_VERSION);
		builtin_macro ("__STDC_VERSION__", lex_integer_Hlit, ISOC_VERSION);
#if LANGUAGE_CPP
		builtin_macro ("__cplusplus", lex_integer_Hlit, CPP_VERSION);
		builtin_macro ("__tcpplus", lex_integer_Hlit, "1");
#else
		builtin_macro ("__tcpplus", lex_integer_Hlit, "0");
#endif
    }
    assert_namespace = make_global_nspace ("<assert>", 20);
    if (a) {
		/* Define built-in assertions */
		IGNORE make_assert (KEYWORD (lex_include), lex_include);
		IGNORE make_assert (KEYWORD (lex_keyword), lex_keyword);
		IGNORE make_assert (KEYWORD (lex_option), lex_option);
    }
    return;
}
syntax highlighted by Code2HTML, v. 0.9.1