/* * Copyright (c) 2002, The Tendra Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * Crown Copyright (c) 1997 * * This TenDRA(r) Computer Program is subject to Copyright * owned by the United Kingdom Secretary of State for Defence * acting through the Defence Evaluation and Research Agency * (DERA). It is made available to Recipients with a * royalty-free licence for its use, reproduction, transfer * to other parties and amendment for any purpose not excluding * product development provided that any such use et cetera * shall be deemed to be acceptance of the following conditions:- * * (1) Its Recipients shall ensure that this Notice is * reproduced upon any copies or amended versions of it; * * (2) Any amended version of it shall be clearly marked to * show both the nature of and the organisation responsible * for the relevant amendment or amendments; * * (3) Its onward transfer from a recipient to another * party shall be deemed to be that party's acceptance of * these conditions; * * (4) DERA gives no warranty or assurance as to its * quality or suitability for any purpose and DERA accepts * no liability whatsoever in relation to any use to which * it may be put. * * $TenDRA: tendra/src/tools/tspec/lex.c,v 1.12 2004/08/08 08:50:20 stefanf Exp $ */ #include "config.h" #include "cstring.h" #include "msgcat.h" #include "object.h" #include "hash.h" #include "lex.h" #include "name.h" #include "syntax.h" #include "type.h" #include "utility.h" /* * CREATE A KEYWORD * * This routine creates a keyword nm with lexical token value t. */ static void make_keyword(char *nm, int t) { object *p = make_object (nm, OBJ_KEYWORD); p->u.u_num = t; IGNORE add_hash (keywords, p, no_version); return; } /* * INITIALISE KEYWORDS * * This routine initialises the hash table of keywords. */ void init_keywords(void) { #define MAKE_KEYWORD(NAME, LEX)\ make_keyword (NAME, LEX) #include "keyword.h" return; } /* * CURRENT LEXICAL TOKEN * * These variables are used to store the value of the current lexical * token. */ int crt_lex_token = lex_unknown; int saved_lex_token = lex_unknown; char *token_value = null; /* * INPUT FILE * * These variable input_file gives the file from which the input is read. * The input_pending variable is used to unread one character. */ FILE *input_file; int input_pending = LEX_EOF; /* * READ A CHARACTER FROM THE INPUT FILE * * This routine reads the next character from the input file. */ static int read_char(void) { int c = input_pending; if (c == LEX_EOF) { c = fgetc (input_file); if (c == '\n') line_no++; if (c == EOF) return (LEX_EOF); c &= 0xff; } else { input_pending = LEX_EOF; } return (c); } /* * MAPPINGS OF LEXICAL ANALYSER ROUTINES * * These macros give the mappings from the lexical analyser to the * routines defined in this module. */ static int read_identifier(int, int, int); static int read_number(int, int); static int read_string(int); static int read_insert(int); static int read_c_comment(int); static int read_comment(int); #define unread_char(A) input_pending = (A) #define get_global(A) read_identifier (0, (A), 0) #define get_local(A, B) read_identifier ((A), (B), 0) #define get_command(A, B) read_identifier ((A), (B), 0) #define get_variable(A, B) read_identifier ((A), (B), 0) #define get_number(A) read_number ((A), 0) #define get_string(A) read_string (0) #define get_comment(A) read_comment (0) #define get_c_comment(A, B) read_c_comment (0) #define get_text(A, B) read_insert (0) #define unknown_token(A) lex_unknown /* * INCLUDE THE LEXICAL ANALYSER * * The automatically generated lexical analyser is included at this * point. It defines the routine read_token which reads the next * lexical token from the input file. */ #include "lexer.h" /* * READ AN IDENTIFIER NAME * * This routine reads an identifier name from the input file. It is * entered after the first character, b, has been read. a gives the * identifier prefix, '+' for commands, '$' for variables, '~' for * local identifiers, and 0 for normal identifiers. */ static int read_identifier(int a, int b, int pp) { int c; object *p; int i = 0; char *s = buffer; if (a) s [i++] = (char) a; s [i++] = (char) b; for (;;) { c = read_char (); if (!is_alphanum (lookup_char (c))) break; s [i] = (char) c; if (++i >= buffsize) { MSG_identifier_too_long (); i = 1; } } unread_char (c); s [i] = 0; p = search_hash (keywords, s, no_version); if (p) return (p->u.u_num); token_value = s; if (a == 0) { if (!pp) token_value = string_copy (s); return (lex_name); } if (a == '$') { if (!pp) token_value = string_copy (s); return (lex_variable); } if (a == '+') { /* Commands */ if (!pp) token_value = string_copy (s); MSG_unknown_command (s); return (lex_name); } token_value = string_concat (HIDDEN_NAME, s + 1); return (lex_name); } /* * READ A NUMBER * * This routine reads a number from the input file. It is entered after * the initial character, a, has been read. */ static int read_number(int a, int pp) { int c; int i = 0; char *s = buffer; s [i++] = (char) a; for (;;) { c = read_char (); if (!is_digit (lookup_char (c))) break; s [i] = (char) c; if (++i >= buffsize) { MSG_number_too_long (); i = 0; } } unread_char (c); s [i] = 0; if (pp) { token_value = s; } else { token_value = string_copy (s); } return (lex_number); } /* * READ A STRING * * This routine reads a string from the input file. It is entered after * the initial quote has been read. */ static int read_string(int pp) { int c; int i = 0; char *s = buffer; for (;;) { c = read_char (); if (c == '"') { /* End of string */ break; } else if (c == '\\') { /* Deal with escaped characters */ c = read_char (); if (c == '\n' || c == LEX_EOF) goto new_line; if (pp) { /* Preserve escapes when preprocessing */ s [i] = '\\'; i++; } else { /* Examine escape sequence */ switch (c) { case 'n' : c = '\n'; break; case 'r' : c = '\r'; break; case 't' : c = '\t'; break; } } } else if (c == '\n' || c == LEX_EOF) { /* Deal with new lines */ new_line : { MSG_new_line_in_string (); s [i] = 0; return (lex_string); } } s [i] = (char) c; if (++i >= buffsize) { MSG_string_too_long (); i = 0; } } s [i] = 0; if (pp) { token_value = s; } else { token_value = string_copy (s); } return (lex_string); } /* * READ A SECTION OF QUOTED TEXT * * This routine reads a section of quoted text (indicated by enclosure * in a number of percent signs) into the buffer. On entry two percents * have already been read. Firstly any further percents are read, then * the text is read until an equal number of percents are encountered. * Any leading or trailing whitespace is ignored if pp is false. */ static int read_insert(int pp) { int c; int i = 0; int p = 0; int percents = 2; char *s = buffer; while (c = read_char (), c == '%') percents++; unread_char (c); if (pp) { /* Preserve percents when preprocessing */ if (percents < buffsize) { for (i = 0; i < percents; i++) s [i] = '%'; } else { MSG_insert_too_long (); } } do { c = read_char (); if (c == '%') { p++; } else { if (c == LEX_EOF) { MSG_end_of_file_in_quoted_text (); return (lex_eof); } p = 0; } s [i] = (char) c; if (++i >= buffsize) { MSG_insert_too_long (); i = 0; } } while (p != percents); if (pp) { /* Preserve percents when preprocessing */ s [i] = 0; token_value = s; } else { /* Strip out initial and final white space */ if (i >= p) i -= p; s [i] = 0; while (--i >= 0) { int a = (int) s [i]; int t = lookup_char (a & 0xff); if (!is_white (t)) break; s [i] = 0; } i = 0; for (;;) { int a = (int) s [i]; int t = lookup_char (a & 0xff); if (!is_white (t)) break; i++; } token_value = string_copy (s + i); } return (percents % 2 ? lex_build_Hinsert : lex_insert); } /* * READ A C COMMENT * * This routine reads a C-style comment into the buffer. The routine is * entered just after the initial / * has been read, and continues until * the corresponding * /. */ static int read_c_comment(int pp) { int c; int i = 2; int p = 0; char *s = buffer; s [0] = '/'; s [1] = '*'; do { c = read_char (); if (c == '*' && p == 0) { p = 1; } else if (c == '/' && p == 1) { p = 2; } else { p = 0; } if (c == LEX_EOF) { MSG_end_of_file_in_comment (); return (lex_eof); } s [i] = (char) c; if (++i >= buffsize) { MSG_comment_too_long (); i = 2; } } while (p != 2); s [i] = 0; if (pp) { token_value = s; } else { token_value = string_copy (s); } return (lex_comment); } /* * READ A TSPEC COMMENT * * This routine steps over a tspec comment. It is entered after the * initial '#' has been read and skips to the end of the line. If pp * is false then the next token is returned. */ static int read_comment(int pp) { int c; while (c = read_char (), c != '\n') { if (c == LEX_EOF) { MSG_end_of_file_in_comment (); return (lex_eof); } } if (pp) return (lex_unknown); return (read_token ()); } /* * READ A PREPROCESSING TOKEN * * This routine is a stripped down version of read_token which is used * in preprocessing. Initial white space is skipped if w is true. * The token read is always stored in the buffer. */ static int read_pptoken(int w) { int c; int t = lex_unknown; do { c = read_char (); } while (w && is_white (lookup_char (c))); switch (c) { case '"' : { return (read_string (1)); } case '#' : { IGNORE read_comment (1); if (w) return (read_pptoken (w)); c = '\n'; break; } case '%' : { int a = read_char (); if (a == '%') return (read_insert (1)); unread_char (a); break; } case '+' : { int a = read_char (); if (is_alpha (lookup_char (a))) { return (read_identifier (c, a, 1)); } unread_char (a); break; } case '/' : { int a = read_char (); if (a == '*') return (read_c_comment (1)); unread_char (a); break; } case ':' : { int a = read_char (); if (a == '=') { buffer [0] = (char) c; buffer [1] = (char) a; buffer [2] = 0; return (lex_assign); } unread_char (a); break; } case '(' : t = lex_open_Hround; break; case ')' : t = lex_close_Hround; break; case '{' : t = lex_open_Hbrace; break; case '}' : t = lex_close_Hbrace; break; case ';' : t = lex_semicolon; break; case ',' : t = lex_comma; break; case LEX_EOF : t = lex_eof; break; } buffer [0] = (char) c; buffer [1] = 0; return (t); } /* * READ A STRING * * This routine reads a string plus one other character from the input * file, storing the string in str and returning the other character. * b is set to true if the string is enclosed in brackets. */ static int read_pp_string(char **str, int *b) { int c = read_pptoken (1); if (c == lex_open_Hround) { *b = 1; c = read_pptoken (1); } if (c != lex_string) { MSG_string_expected (); *str = "???"; return (c); } *str = string_copy (buffer); c = read_pptoken (1); if (*b) { if (c != lex_close_Hround) { MSG_close_round_expected (); } c = read_pptoken (1); } return (c); } /* * PRINT A SUBSET NAME * * This routine prints the command cmd "api", "file", "subset" to the * file output. */ static void print_subset_name(FILE *output, char *cmd, char *api, char *file, char *subset, int b) { if (b) { IGNORE fprintf (output, "%s (\"%s\")", cmd, api); } else { IGNORE fprintf (output, "%s \"%s\"", cmd, api); } if (file) IGNORE fprintf (output, ", \"%s\"", file); if (subset) { if (file == null) IGNORE fputs (", \"\"", output); IGNORE fprintf (output, ", \"%s\"", subset); } return; } /* * PRINT THE CURRENT FILE POSITION * * This routine prints file name and line number directives to the file * output. */ static void print_posn(FILE *output) { static char *last_filename = ""; if (!streq (filename, last_filename)) { IGNORE fprintf (output, "$FILE = \"%s\";\n", filename); last_filename = filename; } IGNORE fprintf (output, "$LINE = %d;\n", line_no - 1); return; } /* * PREPROCESS A SUBFILE * * This routine reads a +IMPLEMENT or +USE directive (indicated by n) * from the input file to output. */ static void preproc_subfile(FILE *output, char *cmd) { int c; int txt; int b = 0; char *api = null; char *file = null; char *subset = null; c = read_pp_string (&api, &b); if (c == lex_comma) { int d = 0; c = read_pp_string (&file, &d); if (d) { MSG_illegally_bracketed_string (); d = 0; } if (c == lex_comma) { c = read_pp_string (&subset, &d); if (d) MSG_illegally_bracketed_string (); } if (*file == 0) file = null; } if (c == lex_semicolon) { txt = ';'; } else if (c == lex_open_Hround) { txt = '('; } else { MSG_semicolon_or_open_round_expected (); txt = ';'; } preproc (output, api, file, subset); print_posn (output); print_subset_name (output, cmd, api, file, subset, b); IGNORE fputc (' ', output); IGNORE fputc (txt, output); return; } /* * PREPROCESS A FILE * * This routine preprocesses the subset api:file:subset into output. */ void preproc(FILE *output, char *api, char *file, char *subset) { int c; char *s; object *p; char *sn, *nm; FILE *old_file; int old_pending; int old_line_no; char *old_filename; boolean found = 0; int brackets = 0; int end_brackets = 0; int if_depth = 0; int else_depth = 0; FILE *input = null; boolean printing = (boolean) (subset ? 0 : 1); /* Check for previous inclusion */ sn = subset_name (api, file, subset); p = search_hash (subsets, sn, no_version); if (p != null) { if (p->u.u_info == null) { MSG_recursive_inclusion (sn); } else if (p->u.u_info->implemented) { MSG_set_not_found (sn); } return; } /* Open the input file */ nm = (file ? file : MASTER_FILE); if (!streq (api, LOCAL_API)) { nm = string_printf ("%s/%s", api, nm); } s = input_dir; while (s) { char *t = strchr (s, ':'); if (t == null) { IGNORE sprintf (buffer, "%s/%s", s, nm); s = null; } else { IGNORE strcpy (buffer, s); IGNORE sprintf (buffer + (t - s), "/%s", nm); s = t + 1; } input = fopen (buffer, "r"); if (input) { nm = string_copy (buffer); break; } } if (input == null) { input = fopen (nm, "r"); if (input == null) { MSG_set_not_found_no_file (sn, nm); p = make_object (sn, OBJ_SUBSET); IGNORE add_hash (subsets, p, no_version); p->u.u_info = make_info (api, file, subset); p->u.u_info->implemented = 1; return; } } if (verbose > 1) { if (subset) { IGNORE printf ("Preprocessing %s [%s] ...\n", nm, subset); } else { IGNORE printf ("Preprocessing %s ...\n", nm); } } old_filename = filename; old_line_no = line_no; old_file = input_file; old_pending = input_pending; filename = nm; line_no = 1; input_file = input; input_pending = LEX_EOF; p = make_object (sn, OBJ_SUBSET); p->u.u_info = null; IGNORE add_hash (subsets, p, no_version); /* Print position identifier */ print_subset_name (output, "+SET", api, file, subset, 0); IGNORE fputs (" := {\n", output); if (printing) print_posn (output); /* Process the input */ while (c = read_pptoken (0), c != lex_eof) { switch (c) { case lex_subset : { /* Deal with subsets */ int d = 0; c = read_pp_string (&s, &d); if (d) MSG_illegally_bracketed_string (); if (c != lex_assign) { MSG_assign_expected (); } c = read_pptoken (1); if (c != lex_open_Hbrace) { MSG_open_hbrace_expected (); } brackets++; if (printing) { int b = brackets; char *cmd = "+IMPLEMENT"; preproc (output, api, file, s); print_subset_name (output, cmd, api, file, s, 0); IGNORE fputs (";\n", output); do { c = read_pptoken (0); if (c == lex_open_Hbrace) { brackets++; } else if (c == lex_close_Hbrace) { brackets--; } else if (c == lex_eof) { MSG_cant_find_end_of_subset (s); goto end_of_file; } } while (brackets >= b); c = read_pptoken (1); if (c != lex_semicolon) { MSG_semicolon_expected (); } print_posn (output); } else { if (streq (s, subset)) { if (found) { MSG_set_already_defined_at (sn, p->line_no); } else { found = 1; printing = 1; print_posn (output); p->line_no = line_no; end_brackets = brackets; } } } break; } case lex_implement : { /* Deal with subset uses */ if (printing) preproc_subfile (output, "+IMPLEMENT"); break; } case lex_use : { /* Deal with subset uses */ if (printing) preproc_subfile (output, "+USE"); break; } case lex_set : { /* Deal with sets */ MSG_pset_directive_in_preprocessor (); goto default_lab; } case lex_if : case lex_ifdef : case lex_ifndef : { if_depth++; else_depth = 0; goto default_lab; } case lex_else : { if (if_depth == 0) { MSG_pelse_without_pif (); } else { if (else_depth) { MSG_duplicate_pelse (); } else_depth = 1; } goto default_lab; } case lex_endif : { if (if_depth == 0) { MSG_pendif_without_pif (); } else { if_depth--; } else_depth = 0; goto default_lab; } case lex_string : { /* Deal with strings */ if (printing) { IGNORE fprintf (output, "\"%s\"", buffer); } break; } case lex_open_Hbrace : { /* Start of subset */ brackets++; goto default_lab; } case lex_close_Hbrace : { /* End of subset */ brackets--; if (brackets < 0) { MSG_unmatched_close_hbrace (); brackets = 0; } if (subset && brackets < end_brackets) { printing = 0; } goto default_lab; } default : default_lab : { /* Deal with simple tokens */ if (printing) IGNORE fputs (buffer, output); break; } } } /* End of file */ end_of_file : { if (brackets) { MSG_bracket_imbalance_of (brackets); } while (if_depth) { MSG_pif_without_pendif (); if_depth--; } IGNORE fputs ("};\n", output); IGNORE fclose (input); p->u.u_info = make_info (api, file, subset); filename = old_filename; line_no = old_line_no; input_file = old_file; input_pending = old_pending; if (subset && !found) { MSG_set_not_found_no_subset (sn, subset); p->u.u_info->implemented = 1; } return; } }