/*
* Copyright (c) 2002, The Tendra Project <http://www.ten15.org/>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* Crown Copyright (c) 1997
*
* This TenDRA(r) Computer Program is subject to Copyright
* owned by the United Kingdom Secretary of State for Defence
* acting through the Defence Evaluation and Research Agency
* (DERA). It is made available to Recipients with a
* royalty-free licence for its use, reproduction, transfer
* to other parties and amendment for any purpose not excluding
* product development provided that any such use et cetera
* shall be deemed to be acceptance of the following conditions:-
*
* (1) Its Recipients shall ensure that this Notice is
* reproduced upon any copies or amended versions of it;
*
* (2) Any amended version of it shall be clearly marked to
* show both the nature of and the organisation responsible
* for the relevant amendment or amendments;
*
* (3) Its onward transfer from a recipient to another
* party shall be deemed to be that party's acceptance of
* these conditions;
*
* (4) DERA gives no warranty or assurance as to its
* quality or suitability for any purpose and DERA accepts
* no liability whatsoever in relation to any use to which
* it may be put.
*
* $TenDRA: tendra/src/tools/tspec/lex.c,v 1.12 2004/08/08 08:50:20 stefanf Exp $
*/
#include "config.h"
#include "cstring.h"
#include "msgcat.h"
#include "object.h"
#include "hash.h"
#include "lex.h"
#include "name.h"
#include "syntax.h"
#include "type.h"
#include "utility.h"
/*
* CREATE A KEYWORD
*
* This routine creates a keyword nm with lexical token value t.
*/
static void
make_keyword(char *nm, int t)
{
object *p = make_object (nm, OBJ_KEYWORD);
p->u.u_num = t;
IGNORE add_hash (keywords, p, no_version);
return;
}
/*
* INITIALISE KEYWORDS
*
* This routine initialises the hash table of keywords.
*/
void
init_keywords(void)
{
#define MAKE_KEYWORD(NAME, LEX)\
make_keyword (NAME, LEX)
#include "keyword.h"
return;
}
/*
* CURRENT LEXICAL TOKEN
*
* These variables are used to store the value of the current lexical
* token.
*/
int crt_lex_token = lex_unknown;
int saved_lex_token = lex_unknown;
char *token_value = null;
/*
* INPUT FILE
*
* These variable input_file gives the file from which the input is read.
* The input_pending variable is used to unread one character.
*/
FILE *input_file;
int input_pending = LEX_EOF;
/*
* READ A CHARACTER FROM THE INPUT FILE
*
* This routine reads the next character from the input file.
*/
static int
read_char(void)
{
int c = input_pending;
if (c == LEX_EOF) {
c = fgetc (input_file);
if (c == '\n') line_no++;
if (c == EOF) return (LEX_EOF);
c &= 0xff;
} else {
input_pending = LEX_EOF;
}
return (c);
}
/*
* MAPPINGS OF LEXICAL ANALYSER ROUTINES
*
* These macros give the mappings from the lexical analyser to the
* routines defined in this module.
*/
static int read_identifier(int, int, int);
static int read_number(int, int);
static int read_string(int);
static int read_insert(int);
static int read_c_comment(int);
static int read_comment(int);
#define unread_char(A) input_pending = (A)
#define get_global(A) read_identifier (0, (A), 0)
#define get_local(A, B) read_identifier ((A), (B), 0)
#define get_command(A, B) read_identifier ((A), (B), 0)
#define get_variable(A, B) read_identifier ((A), (B), 0)
#define get_number(A) read_number ((A), 0)
#define get_string(A) read_string (0)
#define get_comment(A) read_comment (0)
#define get_c_comment(A, B) read_c_comment (0)
#define get_text(A, B) read_insert (0)
#define unknown_token(A) lex_unknown
/*
* INCLUDE THE LEXICAL ANALYSER
*
* The automatically generated lexical analyser is included at this
* point. It defines the routine read_token which reads the next
* lexical token from the input file.
*/
#include "lexer.h"
/*
* READ AN IDENTIFIER NAME
*
* This routine reads an identifier name from the input file. It is
* entered after the first character, b, has been read. a gives the
* identifier prefix, '+' for commands, '$' for variables, '~' for
* local identifiers, and 0 for normal identifiers.
*/
static int
read_identifier(int a, int b, int pp)
{
int c;
object *p;
int i = 0;
char *s = buffer;
if (a) s [i++] = (char) a;
s [i++] = (char) b;
for (;;) {
c = read_char ();
if (!is_alphanum (lookup_char (c))) break;
s [i] = (char) c;
if (++i >= buffsize) {
MSG_identifier_too_long ();
i = 1;
}
}
unread_char (c);
s [i] = 0;
p = search_hash (keywords, s, no_version);
if (p) return (p->u.u_num);
token_value = s;
if (a == 0) {
if (!pp) token_value = string_copy (s);
return (lex_name);
}
if (a == '$') {
if (!pp) token_value = string_copy (s);
return (lex_variable);
}
if (a == '+') {
/* Commands */
if (!pp) token_value = string_copy (s);
MSG_unknown_command (s);
return (lex_name);
}
token_value = string_concat (HIDDEN_NAME, s + 1);
return (lex_name);
}
/*
* READ A NUMBER
*
* This routine reads a number from the input file. It is entered after
* the initial character, a, has been read.
*/
static int
read_number(int a, int pp)
{
int c;
int i = 0;
char *s = buffer;
s [i++] = (char) a;
for (;;) {
c = read_char ();
if (!is_digit (lookup_char (c))) break;
s [i] = (char) c;
if (++i >= buffsize) {
MSG_number_too_long ();
i = 0;
}
}
unread_char (c);
s [i] = 0;
if (pp) {
token_value = s;
} else {
token_value = string_copy (s);
}
return (lex_number);
}
/*
* READ A STRING
*
* This routine reads a string from the input file. It is entered after
* the initial quote has been read.
*/
static int
read_string(int pp)
{
int c;
int i = 0;
char *s = buffer;
for (;;) {
c = read_char ();
if (c == '"') {
/* End of string */
break;
} else if (c == '\\') {
/* Deal with escaped characters */
c = read_char ();
if (c == '\n' || c == LEX_EOF) goto new_line;
if (pp) {
/* Preserve escapes when preprocessing */
s [i] = '\\';
i++;
} else {
/* Examine escape sequence */
switch (c) {
case 'n' : c = '\n'; break;
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
}
}
} else if (c == '\n' || c == LEX_EOF) {
/* Deal with new lines */
new_line : {
MSG_new_line_in_string ();
s [i] = 0;
return (lex_string);
}
}
s [i] = (char) c;
if (++i >= buffsize) {
MSG_string_too_long ();
i = 0;
}
}
s [i] = 0;
if (pp) {
token_value = s;
} else {
token_value = string_copy (s);
}
return (lex_string);
}
/*
* READ A SECTION OF QUOTED TEXT
*
* This routine reads a section of quoted text (indicated by enclosure
* in a number of percent signs) into the buffer. On entry two percents
* have already been read. Firstly any further percents are read, then
* the text is read until an equal number of percents are encountered.
* Any leading or trailing whitespace is ignored if pp is false.
*/
static int
read_insert(int pp)
{
int c;
int i = 0;
int p = 0;
int percents = 2;
char *s = buffer;
while (c = read_char (), c == '%') percents++;
unread_char (c);
if (pp) {
/* Preserve percents when preprocessing */
if (percents < buffsize) {
for (i = 0; i < percents; i++) s [i] = '%';
} else {
MSG_insert_too_long ();
}
}
do {
c = read_char ();
if (c == '%') {
p++;
} else {
if (c == LEX_EOF) {
MSG_end_of_file_in_quoted_text ();
return (lex_eof);
}
p = 0;
}
s [i] = (char) c;
if (++i >= buffsize) {
MSG_insert_too_long ();
i = 0;
}
} while (p != percents);
if (pp) {
/* Preserve percents when preprocessing */
s [i] = 0;
token_value = s;
} else {
/* Strip out initial and final white space */
if (i >= p) i -= p;
s [i] = 0;
while (--i >= 0) {
int a = (int) s [i];
int t = lookup_char (a & 0xff);
if (!is_white (t)) break;
s [i] = 0;
}
i = 0;
for (;;) {
int a = (int) s [i];
int t = lookup_char (a & 0xff);
if (!is_white (t)) break;
i++;
}
token_value = string_copy (s + i);
}
return (percents % 2 ? lex_build_Hinsert : lex_insert);
}
/*
* READ A C COMMENT
*
* This routine reads a C-style comment into the buffer. The routine is
* entered just after the initial / * has been read, and continues until
* the corresponding * /.
*/
static int
read_c_comment(int pp)
{
int c;
int i = 2;
int p = 0;
char *s = buffer;
s [0] = '/';
s [1] = '*';
do {
c = read_char ();
if (c == '*' && p == 0) {
p = 1;
} else if (c == '/' && p == 1) {
p = 2;
} else {
p = 0;
}
if (c == LEX_EOF) {
MSG_end_of_file_in_comment ();
return (lex_eof);
}
s [i] = (char) c;
if (++i >= buffsize) {
MSG_comment_too_long ();
i = 2;
}
} while (p != 2);
s [i] = 0;
if (pp) {
token_value = s;
} else {
token_value = string_copy (s);
}
return (lex_comment);
}
/*
* READ A TSPEC COMMENT
*
* This routine steps over a tspec comment. It is entered after the
* initial '#' has been read and skips to the end of the line. If pp
* is false then the next token is returned.
*/
static int
read_comment(int pp)
{
int c;
while (c = read_char (), c != '\n') {
if (c == LEX_EOF) {
MSG_end_of_file_in_comment ();
return (lex_eof);
}
}
if (pp) return (lex_unknown);
return (read_token ());
}
/*
* READ A PREPROCESSING TOKEN
*
* This routine is a stripped down version of read_token which is used
* in preprocessing. Initial white space is skipped if w is true.
* The token read is always stored in the buffer.
*/
static int
read_pptoken(int w)
{
int c;
int t = lex_unknown;
do {
c = read_char ();
} while (w && is_white (lookup_char (c)));
switch (c) {
case '"' : {
return (read_string (1));
}
case '#' : {
IGNORE read_comment (1);
if (w) return (read_pptoken (w));
c = '\n';
break;
}
case '%' : {
int a = read_char ();
if (a == '%') return (read_insert (1));
unread_char (a);
break;
}
case '+' : {
int a = read_char ();
if (is_alpha (lookup_char (a))) {
return (read_identifier (c, a, 1));
}
unread_char (a);
break;
}
case '/' : {
int a = read_char ();
if (a == '*') return (read_c_comment (1));
unread_char (a);
break;
}
case ':' : {
int a = read_char ();
if (a == '=') {
buffer [0] = (char) c;
buffer [1] = (char) a;
buffer [2] = 0;
return (lex_assign);
}
unread_char (a);
break;
}
case '(' : t = lex_open_Hround; break;
case ')' : t = lex_close_Hround; break;
case '{' : t = lex_open_Hbrace; break;
case '}' : t = lex_close_Hbrace; break;
case ';' : t = lex_semicolon; break;
case ',' : t = lex_comma; break;
case LEX_EOF : t = lex_eof; break;
}
buffer [0] = (char) c;
buffer [1] = 0;
return (t);
}
/*
* READ A STRING
*
* This routine reads a string plus one other character from the input
* file, storing the string in str and returning the other character.
* b is set to true if the string is enclosed in brackets.
*/
static int
read_pp_string(char **str, int *b)
{
int c = read_pptoken (1);
if (c == lex_open_Hround) {
*b = 1;
c = read_pptoken (1);
}
if (c != lex_string) {
MSG_string_expected ();
*str = "???";
return (c);
}
*str = string_copy (buffer);
c = read_pptoken (1);
if (*b) {
if (c != lex_close_Hround) {
MSG_close_round_expected ();
}
c = read_pptoken (1);
}
return (c);
}
/*
* PRINT A SUBSET NAME
*
* This routine prints the command cmd "api", "file", "subset" to the
* file output.
*/
static void
print_subset_name(FILE *output, char *cmd, char *api, char *file,
char *subset, int b)
{
if (b) {
IGNORE fprintf (output, "%s (\"%s\")", cmd, api);
} else {
IGNORE fprintf (output, "%s \"%s\"", cmd, api);
}
if (file) IGNORE fprintf (output, ", \"%s\"", file);
if (subset) {
if (file == null) IGNORE fputs (", \"\"", output);
IGNORE fprintf (output, ", \"%s\"", subset);
}
return;
}
/*
* PRINT THE CURRENT FILE POSITION
*
* This routine prints file name and line number directives to the file
* output.
*/
static void
print_posn(FILE *output)
{
static char *last_filename = "";
if (!streq (filename, last_filename)) {
IGNORE fprintf (output, "$FILE = \"%s\";\n", filename);
last_filename = filename;
}
IGNORE fprintf (output, "$LINE = %d;\n", line_no - 1);
return;
}
/*
* PREPROCESS A SUBFILE
*
* This routine reads a +IMPLEMENT or +USE directive (indicated by n)
* from the input file to output.
*/
static void
preproc_subfile(FILE *output, char *cmd)
{
int c;
int txt;
int b = 0;
char *api = null;
char *file = null;
char *subset = null;
c = read_pp_string (&api, &b);
if (c == lex_comma) {
int d = 0;
c = read_pp_string (&file, &d);
if (d) {
MSG_illegally_bracketed_string ();
d = 0;
}
if (c == lex_comma) {
c = read_pp_string (&subset, &d);
if (d) MSG_illegally_bracketed_string ();
}
if (*file == 0) file = null;
}
if (c == lex_semicolon) {
txt = ';';
} else if (c == lex_open_Hround) {
txt = '(';
} else {
MSG_semicolon_or_open_round_expected ();
txt = ';';
}
preproc (output, api, file, subset);
print_posn (output);
print_subset_name (output, cmd, api, file, subset, b);
IGNORE fputc (' ', output);
IGNORE fputc (txt, output);
return;
}
/*
* PREPROCESS A FILE
*
* This routine preprocesses the subset api:file:subset into output.
*/
void
preproc(FILE *output, char *api, char *file, char *subset)
{
int c;
char *s;
object *p;
char *sn, *nm;
FILE *old_file;
int old_pending;
int old_line_no;
char *old_filename;
boolean found = 0;
int brackets = 0;
int end_brackets = 0;
int if_depth = 0;
int else_depth = 0;
FILE *input = null;
boolean printing = (boolean) (subset ? 0 : 1);
/* Check for previous inclusion */
sn = subset_name (api, file, subset);
p = search_hash (subsets, sn, no_version);
if (p != null) {
if (p->u.u_info == null) {
MSG_recursive_inclusion (sn);
} else if (p->u.u_info->implemented) {
MSG_set_not_found (sn);
}
return;
}
/* Open the input file */
nm = (file ? file : MASTER_FILE);
if (!streq (api, LOCAL_API)) {
nm = string_printf ("%s/%s", api, nm);
}
s = input_dir;
while (s) {
char *t = strchr (s, ':');
if (t == null) {
IGNORE sprintf (buffer, "%s/%s", s, nm);
s = null;
} else {
IGNORE strcpy (buffer, s);
IGNORE sprintf (buffer + (t - s), "/%s", nm);
s = t + 1;
}
input = fopen (buffer, "r");
if (input) {
nm = string_copy (buffer);
break;
}
}
if (input == null) {
input = fopen (nm, "r");
if (input == null) {
MSG_set_not_found_no_file (sn, nm);
p = make_object (sn, OBJ_SUBSET);
IGNORE add_hash (subsets, p, no_version);
p->u.u_info = make_info (api, file, subset);
p->u.u_info->implemented = 1;
return;
}
}
if (verbose > 1) {
if (subset) {
IGNORE printf ("Preprocessing %s [%s] ...\n", nm, subset);
} else {
IGNORE printf ("Preprocessing %s ...\n", nm);
}
}
old_filename = filename;
old_line_no = line_no;
old_file = input_file;
old_pending = input_pending;
filename = nm;
line_no = 1;
input_file = input;
input_pending = LEX_EOF;
p = make_object (sn, OBJ_SUBSET);
p->u.u_info = null;
IGNORE add_hash (subsets, p, no_version);
/* Print position identifier */
print_subset_name (output, "+SET", api, file, subset, 0);
IGNORE fputs (" := {\n", output);
if (printing) print_posn (output);
/* Process the input */
while (c = read_pptoken (0), c != lex_eof) {
switch (c) {
case lex_subset : {
/* Deal with subsets */
int d = 0;
c = read_pp_string (&s, &d);
if (d) MSG_illegally_bracketed_string ();
if (c != lex_assign) {
MSG_assign_expected ();
}
c = read_pptoken (1);
if (c != lex_open_Hbrace) {
MSG_open_hbrace_expected ();
}
brackets++;
if (printing) {
int b = brackets;
char *cmd = "+IMPLEMENT";
preproc (output, api, file, s);
print_subset_name (output, cmd, api, file, s, 0);
IGNORE fputs (";\n", output);
do {
c = read_pptoken (0);
if (c == lex_open_Hbrace) {
brackets++;
} else if (c == lex_close_Hbrace) {
brackets--;
} else if (c == lex_eof) {
MSG_cant_find_end_of_subset (s);
goto end_of_file;
}
} while (brackets >= b);
c = read_pptoken (1);
if (c != lex_semicolon) {
MSG_semicolon_expected ();
}
print_posn (output);
} else {
if (streq (s, subset)) {
if (found) {
MSG_set_already_defined_at (sn, p->line_no);
} else {
found = 1;
printing = 1;
print_posn (output);
p->line_no = line_no;
end_brackets = brackets;
}
}
}
break;
}
case lex_implement : {
/* Deal with subset uses */
if (printing) preproc_subfile (output, "+IMPLEMENT");
break;
}
case lex_use : {
/* Deal with subset uses */
if (printing) preproc_subfile (output, "+USE");
break;
}
case lex_set : {
/* Deal with sets */
MSG_pset_directive_in_preprocessor ();
goto default_lab;
}
case lex_if :
case lex_ifdef :
case lex_ifndef : {
if_depth++;
else_depth = 0;
goto default_lab;
}
case lex_else : {
if (if_depth == 0) {
MSG_pelse_without_pif ();
} else {
if (else_depth) {
MSG_duplicate_pelse ();
}
else_depth = 1;
}
goto default_lab;
}
case lex_endif : {
if (if_depth == 0) {
MSG_pendif_without_pif ();
} else {
if_depth--;
}
else_depth = 0;
goto default_lab;
}
case lex_string : {
/* Deal with strings */
if (printing) {
IGNORE fprintf (output, "\"%s\"", buffer);
}
break;
}
case lex_open_Hbrace : {
/* Start of subset */
brackets++;
goto default_lab;
}
case lex_close_Hbrace : {
/* End of subset */
brackets--;
if (brackets < 0) {
MSG_unmatched_close_hbrace ();
brackets = 0;
}
if (subset && brackets < end_brackets) {
printing = 0;
}
goto default_lab;
}
default :
default_lab : {
/* Deal with simple tokens */
if (printing) IGNORE fputs (buffer, output);
break;
}
}
}
/* End of file */
end_of_file : {
if (brackets) {
MSG_bracket_imbalance_of (brackets);
}
while (if_depth) {
MSG_pif_without_pendif ();
if_depth--;
}
IGNORE fputs ("};\n", output);
IGNORE fclose (input);
p->u.u_info = make_info (api, file, subset);
filename = old_filename;
line_no = old_line_no;
input_file = old_file;
input_pending = old_pending;
if (subset && !found) {
MSG_set_not_found_no_subset (sn, subset);
p->u.u_info->implemented = 1;
}
return;
}
}
syntax highlighted by Code2HTML, v. 0.9.1