/*
* Copyright (c) 2002, The Tendra Project <http://www.ten15.org/>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* Crown Copyright (c) 1997
*
* This TenDRA(r) Computer Program is subject to Copyright
* owned by the United Kingdom Secretary of State for Defence
* acting through the Defence Evaluation and Research Agency
* (DERA). It is made available to Recipients with a
* royalty-free licence for its use, reproduction, transfer
* to other parties and amendment for any purpose not excluding
* product development provided that any such use et cetera
* shall be deemed to be acceptance of the following conditions:-
*
* (1) Its Recipients shall ensure that this Notice is
* reproduced upon any copies or amended versions of it;
*
* (2) Any amended version of it shall be clearly marked to
* show both the nature of and the organisation responsible
* for the relevant amendment or amendments;
*
* (3) Its onward transfer from a recipient to another
* party shall be deemed to be that party's acceptance of
* these conditions;
*
* (4) DERA gives no warranty or assurance as to its
* quality or suitability for any purpose and DERA accepts
* no liability whatsoever in relation to any use to which
* it may be put.
*
* $TenDRA: tendra/src/tools/tnc/read.c,v 1.7 2005/09/21 16:59:15 stefanf Exp $
*/
#include "config.h"
#include "cstring.h"
#include "msgcat.h"
#include "types.h"
#include "read_types.h"
#include "analyser.h"
#include "check.h"
#include "high.h"
#include "names.h"
#include "node.h"
#include "read.h"
#include "shape.h"
#include "table.h"
#include "tdf.h"
#include "utility.h"
/*
* ARE MULTIBYTE STRINGS ALLOWED
*
* This flag is true to indicate that multibyte strings (other than
* 8 bits per character) are allowed.
*/
boolean allow_multibyte = 1;
/*
* READ A TOKEN APPLICATION
*
* A token application of sort s is read and appended to p.
*/
void
read_token(node *p, sortname s)
{
char *ra;
char *wtemp;
sortname rs;
construct *v;
tok_info *info;
boolean in_brackets = 0;
/* Check bracket (1) */
read_word ();
if (!func_input && word_type == INPUT_OPEN) {
in_brackets = 1;
read_word ();
}
/* Read token identifier */
if (word_type != INPUT_WORD)
MSG_FATAL_token_identifier_expected ();
/* Check bracket (2) */
if (func_input) {
wtemp = temp_copy (word);
read_word ();
if (word_type == INPUT_OPEN) {
in_brackets = 1;
} else {
looked_ahead = 1;
}
} else {
wtemp = word;
}
/* Look up token */
v = search_var_hash (wtemp, SORT_token);
if (v == null)
MSG_FATAL_token_not_declared (wtemp);
info = get_tok_info (v);
rs = info->res;
ra = info->args;
if (rs == SORT_unknown)
MSG_FATAL_token_not_declared (wtemp);
if (is_high (rs)) {
high_sort *h = high_sorts + high_no (rs);
rs = h->res;
ra = find_decode_string (h);
}
if (rs != s)
MSG_FATAL_token_returns_wrong_sort (wtemp,
sort_name (rs), sort_name (s));
adjust_token (v);
/* Decode arguments */
p->son = new_node ();
p->son->cons = v;
if (ra) p->son->son = read_node (ra);
/* Check end */
if (in_brackets) {
read_word ();
if (word_type != INPUT_CLOSE) {
MSG_end_of_token_construct_expected (v->name);
looked_ahead = 1;
}
} else {
if (p->son->son) {
MSG_token_construct_should_be_in_brackets (v->name);
}
}
if (do_check) IGNORE set_token_args (info->pars, p->son->son, 0);
return;
}
/*
* READ A TOKEN NAME
*
* This routine reads a token name (as opposed to a token application).
* The token should have sort s.
*/
static node *
read_token_name(sortname s)
{
node *p;
boolean ok = 0;
construct *v;
high_sort *h;
tok_info *info;
/* Read token identifier */
read_word ();
if (word_type != INPUT_WORD)
MSG_FATAL_token_identifier_expected ();
/* Look up token */
v = search_var_hash (word, SORT_token);
if (v == null)
MSG_FATAL_token_not_declared (word);
info = get_tok_info (v);
/* Check consistency */
h = high_sorts + high_no (s);
if (h->res == info->res) {
if (info->args == null) {
if (h->no_args == 0) ok = 1;
} else if (h->no_args) {
char *ha = find_decode_string (h);
if (streq (info->args, ha)) ok = 1;
}
} else if (h->id == info->res) {
if (info->args == null) ok = 1;
}
if (!ok)
MSG_FATAL_token_has_incorrect_sort (v->name);
/* Return the construct */
p = new_node ();
p->cons = v;
if (!text_output) {
p->son = new_node ();
p->son->cons = &token_cons;
}
return (p);
}
/*
* FIND BASIC CONSTRUCT FOR A VARIABLE SORT
*
* This routine returns the construct for turning an identifier into
* an object of sort s.
*/
static long
make_obj(sortname s)
{
long mk = -1;
switch (s) {
case SORT_al_tag : mk = ENC_make_al_tag ; break;
case SORT_label : mk = ENC_make_label ; break;
case SORT_tag : mk = ENC_make_tag ; break;
}
return (mk);
}
/*
* IS A VARIABLE SORT A USE OR AN INTRODUCTION?
*
* This flag is true to indicate that the tag (or whatever) being read
* is a new one being introduced rather than an old one being used.
* The flag intro_tag_var is set to indicate that any tag so introduced
* is a variable. The flag intro_visible is set to true whenever the
* visible access specifier is read.
*/
static boolean intro_var = 0;
static boolean intro_tag_var = 0;
boolean intro_visible = 0;
/*
* SEARCH FOR A VARIABLE SORT
*
* This routine initializes, if appropriate, and returns the construct
* corresponding to the object named nm of sort s.
*/
static construct *
search_var_sort(char *nm, sortname s)
{
construct *v = search_var_hash (nm, s);
if (intro_var) {
if (v == null) {
v = make_construct (s);
v->name = string_copy (nm);
/* Don't add to hash table yet */
if (s == SORT_tag) {
tag_info *info = get_tag_info (v);
info->var = intro_tag_var;
info->vis = intro_visible;
intro_visible = 0;
}
} else {
MSG_FATAL_object_already_in_scope (sort_name (s), nm);
}
} else {
if (v == null) {
if (!dont_check)
MSG_object_not_in_scope (sort_name (s), nm);
v = make_construct (s);
v->name = string_copy (nm);
IGNORE add_to_var_hash (v, s);
}
}
return (v);
}
/*
* READ A VARIABLE SORT
*
* An identifier representing a construct of sort s is read.
*/
node *
read_var_sort(sortname s)
{
node *p;
construct *v;
read_word ();
if (word_type != INPUT_WORD)
MSG_FATAL_sort_identifier_expected (sort_name (s));
v = search_var_sort (word, s);
p = new_node ();
p->cons = v;
return (p);
}
/*
* READ A SEQUENCE EXPRESSION
*
* A sequence expression is read. This is tricky because it is a list
* of exps followed by an exp, which may be read as a list of exps.
*/
void
read_seq_node(node *p)
{
node *q = read_node ("*[x]?[x]");
if (q->bro->son) {
node *r = q->bro->son;
q->bro = r;
p->son = q;
return;
}
q->bro = null;
if (q->cons->encoding == 0) {
MSG_exp_expected ();
return;
}
(q->cons->encoding)--;
p->son = q;
q = q->son;
if (q->bro == null) {
p->son->son = null;
p->son->bro = q;
} else {
while (q->bro->bro) q = q->bro;
p->son->bro = q->bro;
q->bro = null;
}
return;
}
/*
* READ SORT INDICATED BY A SINGLE DECODE LETTER
*
* An object with sort given by the decode letter str is read. If the next
* object is not of this sort then either an error is flagged (if strict
* is true) or null is returned.
*/
static node *
read_node_aux(char *str, int strict)
{
sortname s;
char *wtemp;
node *p, *ps;
construct *cons;
read_func fn = null;
boolean in_brackets = 0;
/* Find the corresponding sort name */
if (str [1] == '&') {
/* Introduced variable */
intro_var = 1;
intro_tag_var = 1;
} else if (str [1] == '^') {
/* Introduced identity */
intro_var = 1;
intro_tag_var = 0;
}
switch (str [0]) {
case 'i' : {
s = SORT_tdfint;
break;
}
case 'j' : {
s = SORT_tdfbool;
break;
}
case '$' : {
s = SORT_tdfstring;
break;
}
case 'F' : {
s = SORT_unknown;
break;
}
default : {
s = find_sort (str [0]);
fn = sort_read [s];
break;
}
}
/* Read the next word */
read_word ();
/* Check for blanks */
if (word_type == INPUT_BLANK && !strict) {
word_type = INPUT_BLANK_FIRST;
return (null);
}
/* Check for bars */
if (word_type == INPUT_BAR && !strict) {
word_type = INPUT_BAR_FIRST;
return (null);
}
/* Deal with strings */
if (s == SORT_tdfstring) {
if (word_type == INPUT_STRING) {
p = new_node ();
p->cons = new_construct ();
p->cons->sortnum = SORT_tdfstring;
p->cons->encoding = word_length;
p->cons->name = string_ncopy (word, (int) word_length);
p->cons->next = null;
return (p);
} else {
boolean is_multibyte = 0;
if (func_input) {
if (word_type == INPUT_WORD) {
if (streq (word, MAKE_STRING)) {
read_word ();
if (word_type == INPUT_OPEN) is_multibyte = 1;
}
}
} else {
if (word_type == INPUT_OPEN) {
read_word ();
if (word_type == INPUT_WORD) {
if (streq (word, MAKE_STRING)) is_multibyte = 1;
}
}
}
if (is_multibyte) {
if (!allow_multibyte)
MSG_FATAL_multibyte_strings_not_allowed_here ();
p = new_node ();
p->cons = &string_cons;
p->son = read_node ("i*[i]");
read_word ();
if (word_type != INPUT_CLOSE) {
MSG_end_of_multibyte_string_expected ();
}
return (p);
}
}
if (strict) MSG_FATAL_string_expected ();
return (null);
}
/* Deal with numbers */
if (word_type == INPUT_NUMBER) {
boolean negate = 0;
if (*word == '-') {
word++;
negate = 1;
}
p = new_node ();
p->cons = new_construct ();
if (fits_ulong (word, 1)) {
p->cons->sortnum = SORT_small_tdfint;
p->cons->encoding = (long) octal_to_ulong (word);
} else {
p->cons->sortnum = SORT_tdfint;
p->cons->name = string_copy(word);
}
switch (s) {
case SORT_tdfint : {
if (negate) {
MSG_negative_nat ();
}
return (p);
}
case SORT_tdfbool : {
node *q = new_node ();
q->cons = (negate ? &true_cons : &false_cons);
q->bro = p;
return (q);
}
case SORT_nat : {
node *q = new_node ();
if (negate) {
MSG_negative_nat ();
}
q->cons = cons_no (SORT_nat, ENC_make_nat);
q->son = p;
return (q);
}
case SORT_signed_nat : {
node *q = new_node ();
q->cons = cons_no (SORT_signed_nat, ENC_make_signed_nat);
q->son = new_node ();
q->son->cons = (negate ? &true_cons : &false_cons);
q->son->bro = p;
return (q);
}
default : {
if (strict) MSG_FATAL_name_expected (sort_name (s));
return (null);
}
}
}
/* Deal with strings */
if (word_type == INPUT_STRING) {
if (s == SORT_string) {
node *q;
p = new_node ();
p->cons = new_construct ();
p->cons->sortnum = SORT_tdfstring;
p->cons->encoding = word_length;
p->cons->name = string_ncopy (word, (int) word_length);
p->cons->next = null;
q = new_node ();
q->cons = cons_no (SORT_string, ENC_make_string);
q->son = p;
return (q);
}
}
/* That was the last chance for numbers */
if (fn == null) {
if (strict) MSG_FATAL_number_expected ();
return (null);
}
/* Check for brackets (1) */
if (!func_input && word_type == INPUT_OPEN) {
in_brackets = 1;
read_word ();
}
/* The next word should be the identifier */
if (word_type != INPUT_WORD) {
if (strict) MSG_FATAL_name_expected (sort_name (s));
return (null);
}
/* Check for brackets (2) */
if (func_input) {
wtemp = temp_copy (word);
read_word ();
if (word_type == INPUT_OPEN) {
in_brackets = 1;
} else {
looked_ahead = 1;
}
} else {
wtemp = word;
}
if (s == SORT_string && streq (word, MAKE_STRING)) {
node *q;
p = new_node ();
p->cons = &string_cons;
p->son = read_node ("i*[i]");
read_word ();
if (word_type != INPUT_CLOSE) {
MSG_end_of_multibyte_string_expected ();
}
q = new_node ();
q->cons = cons_no (SORT_string, ENC_make_string);
q->son = p;
return (q);
}
/* Look up construct */
cons = search_cons_hash (wtemp, s);
if (cons) {
p = fn (cons->encoding);
ps = p->son;
} else {
boolean do_check_tag = 0;
if (!in_brackets && (s == SORT_al_tag || s == SORT_label ||
s == SORT_tag)) {
do_check_tag = 1;
}
/* Look up token */
cons = search_var_hash (wtemp, SORT_token);
if (cons) {
tok_info *info = get_tok_info (cons);
sortname rs = info->res;
char *ra = info->args;
if (rs == SORT_unknown) {
if (do_check_tag) goto check_lab;
MSG_FATAL_token_not_declared (wtemp);
}
if (is_high (rs)) {
high_sort *h = high_sorts + high_no (rs);
rs = h->res;
ra = find_decode_string (h);
}
if (rs != s) {
if (do_check_tag) goto check_lab;
if (!strict) return (null);
MSG_FATAL_token_returns_wrong_sort (wtemp,
sort_name (rs), sort_name (s));
}
adjust_token (cons);
p = new_node ();
p->cons = cons_no (s, sort_tokens [s]);
p->son = new_node ();
p->son->cons = cons;
if (ra) p->son->son = read_node (ra);
ps = p->son->son;
if (do_check) {
IGNORE set_token_args (info->pars, p->son->son, 0);
if (s == SORT_exp) check_exp (p);
}
} else {
/* Look up label, tag etc */
if (do_check_tag) {
check_lab : cons = search_var_sort (wtemp, s);
}
if (cons) {
long mk = make_obj (s);
p = new_node ();
p->cons = cons_no (s, mk);
p->son = new_node ();
p->son->cons = cons;
ps = null;
} else {
if (strict) {
MSG_FATAL_illegal_sort (sort_name (s), wtemp);
}
return (null);
}
}
}
/* Check end of construct */
if (in_brackets) {
read_word ();
if (word_type != INPUT_CLOSE) {
MSG_end_of_construct_expected (cons->name);
looked_ahead = 1;
}
} else {
if (ps) {
MSG_construct_should_be_in_brackets (cons->name);
}
}
return (p);
}
/*
* BRING VARIABLES INTO AND OUT OF SCOPE
*
* The tags, labels etc introduced in p are brought into (if end is
* false) or out of (if end is true) scope. This only works because
* all the constructs which introduce these variables are of a fairly
* simple form.
*/
static void
adjust_scope(node *p, int end)
{
node *p0 = p;
while (p) {
construct *v = p->cons;
sortname s = v->sortnum;
switch (s) {
case SORT_repeat :
case SORT_option : {
/* Scan repeated and optional arguments */
if (p->son) adjust_scope (p->son, end);
break;
}
case SORT_al_tag :
case SORT_label :
case SORT_tag : {
/* Variable found - adjust scope */
if (v->encoding == make_obj (s)) {
construct *u = p->son->cons;
if (end) {
if (s == SORT_tag) {
/* Visible tags aren't removed */
tag_info *info = get_tag_info (u);
if (info->vis) break;
}
remove_var_hash (u->name, s);
} else {
if (add_to_var_hash (u, s)) {
MSG_FATAL_object_already_in_scope (
sort_name (s), u->name);
}
if (do_check && s == SORT_tag) {
/* Fill in shape of tag */
node *ts;
node *p1 = p->bro;
tag_info *info = get_tag_info (u);
if (p1 && p1->cons->sortnum == SORT_exp) {
/* identity and variable have "t&x" */
ts = p1->shape;
} else if (p0->cons->sortnum == SORT_shape) {
/* make_proc etc have "S?[u]t&" */
ts = copy_node (p0);
} else {
/* don't know about other constructs */
ts = null;
}
/* Declaration = ?[u]?[X]S from 4.0 */
info->dec = new_node ();
info->dec->cons = &false_cons;
info->dec->bro = new_node ();
info->dec->bro->cons = &false_cons;
info->dec->bro->bro = ts;
}
}
}
break;
}
}
p = p->bro;
}
return;
}
/*
* CHECK FOR COMMA OR CLOSE BRACKET
*
* The next word should be a comma, which is stepped over, or a close
* bracket.
*/
static void
check_comma(void)
{
read_word ();
if (word_type == INPUT_COMMA) {
read_word ();
looked_ahead = 1;
if (word_type == INPUT_CLOSE) {
MSG_badly_placed_comma ();
}
return;
}
if (word_type != INPUT_CLOSE) {
MSG_comma_or_close_bracket_expected ();
}
looked_ahead = 1;
return;
}
/*
* READ SORTS GIVEN BY A STRING OF DECODE LETTERS
*
* A node corresponding to the decode string str is read from the
* input file.
*/
node *
read_node(char *str)
{
char c;
position store;
node *p, *q = null, *qe = null;
while (c = *str, (c != 0 && c != ']')) {
switch (c) {
case '{' : {
/* Start of scope */
adjust_scope (q, 0);
p = null;
break;
}
case '}' : {
/* End of scope */
adjust_scope (q, 1);
p = null;
break;
}
case '[' :
case '|' :
case '&' :
case '^' : {
/* Ignore these cases */
p = null;
break;
}
case '*' :
case '!' : {
/* Repeats */
char cr;
char *sr;
long n = 0;
int opt = 0;
node *pe = null, *pr;
str += 2;
cr = *str;
sr = str;
if (cr == '?') {
/* Allow for lists of options */
opt = 1;
str += 2;
cr = *str;
sr = skip_text (str);
}
if (cr == '*' || cr == '!') {
MSG_FATAL_lists_of_lists_not_implemented ();
} else if (cr == '?') {
MSG_FATAL_lists_of_options_not_implemented ();
}
p = new_node ();
p->cons = new_construct ();
p->cons->sortnum = SORT_repeat;
do {
store_position (&store);
pr = read_node_aux (sr, 0);
if (pr || (opt && word_type == INPUT_BLANK_FIRST)) {
if (func_input) check_comma ();
if (opt) {
/* Allow for optionals */
node *pt = pr;
if (pt && str [1] != ']') {
pt->bro = read_node (str + 1);
}
pr = new_node ();
pr->cons = &optional_cons;
pr->son = pt;
}
if (sr [1] != ']') {
pr->bro = read_node (sr + 1);
}
if (pe == null) {
p->son = pr;
} else {
pe->bro = pr;
}
pe = pr;
while (pe->bro) pe = pe->bro;
n++;
} else {
if (word_type == INPUT_BAR_FIRST) {
if (func_input) check_comma ();
} else if (c == '!' && n == 0 &&
word_type == INPUT_BLANK_FIRST) {
if (func_input) check_comma ();
} else {
set_position (&store);
}
}
} while (pr);
p->cons->encoding = n;
if (opt) sr++;
str = skip_text (sr);
if (c == '!') {
/* Optional repeats */
node *pt = p;
p = new_node ();
p->cons = &optional_cons;
if (n) p->son = pt;
}
break;
}
case '?' : {
/* Optionals */
node *po;
char co;
str += 2;
co = *str;
if (co == '*' || co == '!') {
MSG_FATAL_optional_lists_not_implemented ();
} else if (co == '?') {
MSG_FATAL_optional_options_not_implemented ();
}
intro_visible = 0;
store_position (&store);
po = read_node_aux (str, 0);
if (po) {
if (func_input) check_comma ();
if (str [1] != ']') {
po->bro = read_node (str + 1);
}
} else {
if (word_type == INPUT_BLANK_FIRST) {
if (func_input) check_comma ();
} else {
set_position (&store);
}
}
p = new_node ();
p->cons = &optional_cons;
p->son = po;
str = skip_text (str);
break;
}
case '@' : {
/* Conditionals */
str += 2;
p = new_node ();
p->cons = &bytestream_cons;
p->son = read_node (str);
str = skip_text (str);
break;
}
case 'T' : {
/* Tokens */
sortname sn;
str = find_sortname (str, &sn);
p = read_token_name (sn);
break;
}
default : {
/* Simple sort */
p = read_node_aux (str, 1);
if (func_input) check_comma ();
break;
}
}
if (p) {
if (qe == null) {
q = p;
} else {
qe->bro = p;
}
qe = p;
while (qe->bro) qe = qe->bro;
intro_var = 0;
}
str++;
}
return (q);
}
syntax highlighted by Code2HTML, v. 0.9.1