/* * Copyright (c) 2002, The Tendra Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * Crown Copyright (c) 1997 * * This TenDRA(r) Computer Program is subject to Copyright * owned by the United Kingdom Secretary of State for Defence * acting through the Defence Evaluation and Research Agency * (DERA). It is made available to Recipients with a * royalty-free licence for its use, reproduction, transfer * to other parties and amendment for any purpose not excluding * product development provided that any such use et cetera * shall be deemed to be acceptance of the following conditions:- * * (1) Its Recipients shall ensure that this Notice is * reproduced upon any copies or amended versions of it; * * (2) Any amended version of it shall be clearly marked to * show both the nature of and the organisation responsible * for the relevant amendment or amendments; * * (3) Its onward transfer from a recipient to another * party shall be deemed to be that party's acceptance of * these conditions; * * (4) DERA gives no warranty or assurance as to its * quality or suitability for any purpose and DERA accepts * no liability whatsoever in relation to any use to which * it may be put. * * $TenDRA: tendra/src/tools/tnc/read.c,v 1.7 2005/09/21 16:59:15 stefanf Exp $ */ #include "config.h" #include "cstring.h" #include "msgcat.h" #include "types.h" #include "read_types.h" #include "analyser.h" #include "check.h" #include "high.h" #include "names.h" #include "node.h" #include "read.h" #include "shape.h" #include "table.h" #include "tdf.h" #include "utility.h" /* * ARE MULTIBYTE STRINGS ALLOWED * * This flag is true to indicate that multibyte strings (other than * 8 bits per character) are allowed. */ boolean allow_multibyte = 1; /* * READ A TOKEN APPLICATION * * A token application of sort s is read and appended to p. */ void read_token(node *p, sortname s) { char *ra; char *wtemp; sortname rs; construct *v; tok_info *info; boolean in_brackets = 0; /* Check bracket (1) */ read_word (); if (!func_input && word_type == INPUT_OPEN) { in_brackets = 1; read_word (); } /* Read token identifier */ if (word_type != INPUT_WORD) MSG_FATAL_token_identifier_expected (); /* Check bracket (2) */ if (func_input) { wtemp = temp_copy (word); read_word (); if (word_type == INPUT_OPEN) { in_brackets = 1; } else { looked_ahead = 1; } } else { wtemp = word; } /* Look up token */ v = search_var_hash (wtemp, SORT_token); if (v == null) MSG_FATAL_token_not_declared (wtemp); info = get_tok_info (v); rs = info->res; ra = info->args; if (rs == SORT_unknown) MSG_FATAL_token_not_declared (wtemp); if (is_high (rs)) { high_sort *h = high_sorts + high_no (rs); rs = h->res; ra = find_decode_string (h); } if (rs != s) MSG_FATAL_token_returns_wrong_sort (wtemp, sort_name (rs), sort_name (s)); adjust_token (v); /* Decode arguments */ p->son = new_node (); p->son->cons = v; if (ra) p->son->son = read_node (ra); /* Check end */ if (in_brackets) { read_word (); if (word_type != INPUT_CLOSE) { MSG_end_of_token_construct_expected (v->name); looked_ahead = 1; } } else { if (p->son->son) { MSG_token_construct_should_be_in_brackets (v->name); } } if (do_check) IGNORE set_token_args (info->pars, p->son->son, 0); return; } /* * READ A TOKEN NAME * * This routine reads a token name (as opposed to a token application). * The token should have sort s. */ static node * read_token_name(sortname s) { node *p; boolean ok = 0; construct *v; high_sort *h; tok_info *info; /* Read token identifier */ read_word (); if (word_type != INPUT_WORD) MSG_FATAL_token_identifier_expected (); /* Look up token */ v = search_var_hash (word, SORT_token); if (v == null) MSG_FATAL_token_not_declared (word); info = get_tok_info (v); /* Check consistency */ h = high_sorts + high_no (s); if (h->res == info->res) { if (info->args == null) { if (h->no_args == 0) ok = 1; } else if (h->no_args) { char *ha = find_decode_string (h); if (streq (info->args, ha)) ok = 1; } } else if (h->id == info->res) { if (info->args == null) ok = 1; } if (!ok) MSG_FATAL_token_has_incorrect_sort (v->name); /* Return the construct */ p = new_node (); p->cons = v; if (!text_output) { p->son = new_node (); p->son->cons = &token_cons; } return (p); } /* * FIND BASIC CONSTRUCT FOR A VARIABLE SORT * * This routine returns the construct for turning an identifier into * an object of sort s. */ static long make_obj(sortname s) { long mk = -1; switch (s) { case SORT_al_tag : mk = ENC_make_al_tag ; break; case SORT_label : mk = ENC_make_label ; break; case SORT_tag : mk = ENC_make_tag ; break; } return (mk); } /* * IS A VARIABLE SORT A USE OR AN INTRODUCTION? * * This flag is true to indicate that the tag (or whatever) being read * is a new one being introduced rather than an old one being used. * The flag intro_tag_var is set to indicate that any tag so introduced * is a variable. The flag intro_visible is set to true whenever the * visible access specifier is read. */ static boolean intro_var = 0; static boolean intro_tag_var = 0; boolean intro_visible = 0; /* * SEARCH FOR A VARIABLE SORT * * This routine initializes, if appropriate, and returns the construct * corresponding to the object named nm of sort s. */ static construct * search_var_sort(char *nm, sortname s) { construct *v = search_var_hash (nm, s); if (intro_var) { if (v == null) { v = make_construct (s); v->name = string_copy (nm); /* Don't add to hash table yet */ if (s == SORT_tag) { tag_info *info = get_tag_info (v); info->var = intro_tag_var; info->vis = intro_visible; intro_visible = 0; } } else { MSG_FATAL_object_already_in_scope (sort_name (s), nm); } } else { if (v == null) { if (!dont_check) MSG_object_not_in_scope (sort_name (s), nm); v = make_construct (s); v->name = string_copy (nm); IGNORE add_to_var_hash (v, s); } } return (v); } /* * READ A VARIABLE SORT * * An identifier representing a construct of sort s is read. */ node * read_var_sort(sortname s) { node *p; construct *v; read_word (); if (word_type != INPUT_WORD) MSG_FATAL_sort_identifier_expected (sort_name (s)); v = search_var_sort (word, s); p = new_node (); p->cons = v; return (p); } /* * READ A SEQUENCE EXPRESSION * * A sequence expression is read. This is tricky because it is a list * of exps followed by an exp, which may be read as a list of exps. */ void read_seq_node(node *p) { node *q = read_node ("*[x]?[x]"); if (q->bro->son) { node *r = q->bro->son; q->bro = r; p->son = q; return; } q->bro = null; if (q->cons->encoding == 0) { MSG_exp_expected (); return; } (q->cons->encoding)--; p->son = q; q = q->son; if (q->bro == null) { p->son->son = null; p->son->bro = q; } else { while (q->bro->bro) q = q->bro; p->son->bro = q->bro; q->bro = null; } return; } /* * READ SORT INDICATED BY A SINGLE DECODE LETTER * * An object with sort given by the decode letter str is read. If the next * object is not of this sort then either an error is flagged (if strict * is true) or null is returned. */ static node * read_node_aux(char *str, int strict) { sortname s; char *wtemp; node *p, *ps; construct *cons; read_func fn = null; boolean in_brackets = 0; /* Find the corresponding sort name */ if (str [1] == '&') { /* Introduced variable */ intro_var = 1; intro_tag_var = 1; } else if (str [1] == '^') { /* Introduced identity */ intro_var = 1; intro_tag_var = 0; } switch (str [0]) { case 'i' : { s = SORT_tdfint; break; } case 'j' : { s = SORT_tdfbool; break; } case '$' : { s = SORT_tdfstring; break; } case 'F' : { s = SORT_unknown; break; } default : { s = find_sort (str [0]); fn = sort_read [s]; break; } } /* Read the next word */ read_word (); /* Check for blanks */ if (word_type == INPUT_BLANK && !strict) { word_type = INPUT_BLANK_FIRST; return (null); } /* Check for bars */ if (word_type == INPUT_BAR && !strict) { word_type = INPUT_BAR_FIRST; return (null); } /* Deal with strings */ if (s == SORT_tdfstring) { if (word_type == INPUT_STRING) { p = new_node (); p->cons = new_construct (); p->cons->sortnum = SORT_tdfstring; p->cons->encoding = word_length; p->cons->name = string_ncopy (word, (int) word_length); p->cons->next = null; return (p); } else { boolean is_multibyte = 0; if (func_input) { if (word_type == INPUT_WORD) { if (streq (word, MAKE_STRING)) { read_word (); if (word_type == INPUT_OPEN) is_multibyte = 1; } } } else { if (word_type == INPUT_OPEN) { read_word (); if (word_type == INPUT_WORD) { if (streq (word, MAKE_STRING)) is_multibyte = 1; } } } if (is_multibyte) { if (!allow_multibyte) MSG_FATAL_multibyte_strings_not_allowed_here (); p = new_node (); p->cons = &string_cons; p->son = read_node ("i*[i]"); read_word (); if (word_type != INPUT_CLOSE) { MSG_end_of_multibyte_string_expected (); } return (p); } } if (strict) MSG_FATAL_string_expected (); return (null); } /* Deal with numbers */ if (word_type == INPUT_NUMBER) { boolean negate = 0; if (*word == '-') { word++; negate = 1; } p = new_node (); p->cons = new_construct (); if (fits_ulong (word, 1)) { p->cons->sortnum = SORT_small_tdfint; p->cons->encoding = (long) octal_to_ulong (word); } else { p->cons->sortnum = SORT_tdfint; p->cons->name = string_copy(word); } switch (s) { case SORT_tdfint : { if (negate) { MSG_negative_nat (); } return (p); } case SORT_tdfbool : { node *q = new_node (); q->cons = (negate ? &true_cons : &false_cons); q->bro = p; return (q); } case SORT_nat : { node *q = new_node (); if (negate) { MSG_negative_nat (); } q->cons = cons_no (SORT_nat, ENC_make_nat); q->son = p; return (q); } case SORT_signed_nat : { node *q = new_node (); q->cons = cons_no (SORT_signed_nat, ENC_make_signed_nat); q->son = new_node (); q->son->cons = (negate ? &true_cons : &false_cons); q->son->bro = p; return (q); } default : { if (strict) MSG_FATAL_name_expected (sort_name (s)); return (null); } } } /* Deal with strings */ if (word_type == INPUT_STRING) { if (s == SORT_string) { node *q; p = new_node (); p->cons = new_construct (); p->cons->sortnum = SORT_tdfstring; p->cons->encoding = word_length; p->cons->name = string_ncopy (word, (int) word_length); p->cons->next = null; q = new_node (); q->cons = cons_no (SORT_string, ENC_make_string); q->son = p; return (q); } } /* That was the last chance for numbers */ if (fn == null) { if (strict) MSG_FATAL_number_expected (); return (null); } /* Check for brackets (1) */ if (!func_input && word_type == INPUT_OPEN) { in_brackets = 1; read_word (); } /* The next word should be the identifier */ if (word_type != INPUT_WORD) { if (strict) MSG_FATAL_name_expected (sort_name (s)); return (null); } /* Check for brackets (2) */ if (func_input) { wtemp = temp_copy (word); read_word (); if (word_type == INPUT_OPEN) { in_brackets = 1; } else { looked_ahead = 1; } } else { wtemp = word; } if (s == SORT_string && streq (word, MAKE_STRING)) { node *q; p = new_node (); p->cons = &string_cons; p->son = read_node ("i*[i]"); read_word (); if (word_type != INPUT_CLOSE) { MSG_end_of_multibyte_string_expected (); } q = new_node (); q->cons = cons_no (SORT_string, ENC_make_string); q->son = p; return (q); } /* Look up construct */ cons = search_cons_hash (wtemp, s); if (cons) { p = fn (cons->encoding); ps = p->son; } else { boolean do_check_tag = 0; if (!in_brackets && (s == SORT_al_tag || s == SORT_label || s == SORT_tag)) { do_check_tag = 1; } /* Look up token */ cons = search_var_hash (wtemp, SORT_token); if (cons) { tok_info *info = get_tok_info (cons); sortname rs = info->res; char *ra = info->args; if (rs == SORT_unknown) { if (do_check_tag) goto check_lab; MSG_FATAL_token_not_declared (wtemp); } if (is_high (rs)) { high_sort *h = high_sorts + high_no (rs); rs = h->res; ra = find_decode_string (h); } if (rs != s) { if (do_check_tag) goto check_lab; if (!strict) return (null); MSG_FATAL_token_returns_wrong_sort (wtemp, sort_name (rs), sort_name (s)); } adjust_token (cons); p = new_node (); p->cons = cons_no (s, sort_tokens [s]); p->son = new_node (); p->son->cons = cons; if (ra) p->son->son = read_node (ra); ps = p->son->son; if (do_check) { IGNORE set_token_args (info->pars, p->son->son, 0); if (s == SORT_exp) check_exp (p); } } else { /* Look up label, tag etc */ if (do_check_tag) { check_lab : cons = search_var_sort (wtemp, s); } if (cons) { long mk = make_obj (s); p = new_node (); p->cons = cons_no (s, mk); p->son = new_node (); p->son->cons = cons; ps = null; } else { if (strict) { MSG_FATAL_illegal_sort (sort_name (s), wtemp); } return (null); } } } /* Check end of construct */ if (in_brackets) { read_word (); if (word_type != INPUT_CLOSE) { MSG_end_of_construct_expected (cons->name); looked_ahead = 1; } } else { if (ps) { MSG_construct_should_be_in_brackets (cons->name); } } return (p); } /* * BRING VARIABLES INTO AND OUT OF SCOPE * * The tags, labels etc introduced in p are brought into (if end is * false) or out of (if end is true) scope. This only works because * all the constructs which introduce these variables are of a fairly * simple form. */ static void adjust_scope(node *p, int end) { node *p0 = p; while (p) { construct *v = p->cons; sortname s = v->sortnum; switch (s) { case SORT_repeat : case SORT_option : { /* Scan repeated and optional arguments */ if (p->son) adjust_scope (p->son, end); break; } case SORT_al_tag : case SORT_label : case SORT_tag : { /* Variable found - adjust scope */ if (v->encoding == make_obj (s)) { construct *u = p->son->cons; if (end) { if (s == SORT_tag) { /* Visible tags aren't removed */ tag_info *info = get_tag_info (u); if (info->vis) break; } remove_var_hash (u->name, s); } else { if (add_to_var_hash (u, s)) { MSG_FATAL_object_already_in_scope ( sort_name (s), u->name); } if (do_check && s == SORT_tag) { /* Fill in shape of tag */ node *ts; node *p1 = p->bro; tag_info *info = get_tag_info (u); if (p1 && p1->cons->sortnum == SORT_exp) { /* identity and variable have "t&x" */ ts = p1->shape; } else if (p0->cons->sortnum == SORT_shape) { /* make_proc etc have "S?[u]t&" */ ts = copy_node (p0); } else { /* don't know about other constructs */ ts = null; } /* Declaration = ?[u]?[X]S from 4.0 */ info->dec = new_node (); info->dec->cons = &false_cons; info->dec->bro = new_node (); info->dec->bro->cons = &false_cons; info->dec->bro->bro = ts; } } } break; } } p = p->bro; } return; } /* * CHECK FOR COMMA OR CLOSE BRACKET * * The next word should be a comma, which is stepped over, or a close * bracket. */ static void check_comma(void) { read_word (); if (word_type == INPUT_COMMA) { read_word (); looked_ahead = 1; if (word_type == INPUT_CLOSE) { MSG_badly_placed_comma (); } return; } if (word_type != INPUT_CLOSE) { MSG_comma_or_close_bracket_expected (); } looked_ahead = 1; return; } /* * READ SORTS GIVEN BY A STRING OF DECODE LETTERS * * A node corresponding to the decode string str is read from the * input file. */ node * read_node(char *str) { char c; position store; node *p, *q = null, *qe = null; while (c = *str, (c != 0 && c != ']')) { switch (c) { case '{' : { /* Start of scope */ adjust_scope (q, 0); p = null; break; } case '}' : { /* End of scope */ adjust_scope (q, 1); p = null; break; } case '[' : case '|' : case '&' : case '^' : { /* Ignore these cases */ p = null; break; } case '*' : case '!' : { /* Repeats */ char cr; char *sr; long n = 0; int opt = 0; node *pe = null, *pr; str += 2; cr = *str; sr = str; if (cr == '?') { /* Allow for lists of options */ opt = 1; str += 2; cr = *str; sr = skip_text (str); } if (cr == '*' || cr == '!') { MSG_FATAL_lists_of_lists_not_implemented (); } else if (cr == '?') { MSG_FATAL_lists_of_options_not_implemented (); } p = new_node (); p->cons = new_construct (); p->cons->sortnum = SORT_repeat; do { store_position (&store); pr = read_node_aux (sr, 0); if (pr || (opt && word_type == INPUT_BLANK_FIRST)) { if (func_input) check_comma (); if (opt) { /* Allow for optionals */ node *pt = pr; if (pt && str [1] != ']') { pt->bro = read_node (str + 1); } pr = new_node (); pr->cons = &optional_cons; pr->son = pt; } if (sr [1] != ']') { pr->bro = read_node (sr + 1); } if (pe == null) { p->son = pr; } else { pe->bro = pr; } pe = pr; while (pe->bro) pe = pe->bro; n++; } else { if (word_type == INPUT_BAR_FIRST) { if (func_input) check_comma (); } else if (c == '!' && n == 0 && word_type == INPUT_BLANK_FIRST) { if (func_input) check_comma (); } else { set_position (&store); } } } while (pr); p->cons->encoding = n; if (opt) sr++; str = skip_text (sr); if (c == '!') { /* Optional repeats */ node *pt = p; p = new_node (); p->cons = &optional_cons; if (n) p->son = pt; } break; } case '?' : { /* Optionals */ node *po; char co; str += 2; co = *str; if (co == '*' || co == '!') { MSG_FATAL_optional_lists_not_implemented (); } else if (co == '?') { MSG_FATAL_optional_options_not_implemented (); } intro_visible = 0; store_position (&store); po = read_node_aux (str, 0); if (po) { if (func_input) check_comma (); if (str [1] != ']') { po->bro = read_node (str + 1); } } else { if (word_type == INPUT_BLANK_FIRST) { if (func_input) check_comma (); } else { set_position (&store); } } p = new_node (); p->cons = &optional_cons; p->son = po; str = skip_text (str); break; } case '@' : { /* Conditionals */ str += 2; p = new_node (); p->cons = &bytestream_cons; p->son = read_node (str); str = skip_text (str); break; } case 'T' : { /* Tokens */ sortname sn; str = find_sortname (str, &sn); p = read_token_name (sn); break; } default : { /* Simple sort */ p = read_node_aux (str, 1); if (func_input) check_comma (); break; } } if (p) { if (qe == null) { q = p; } else { qe->bro = p; } qe = p; while (qe->bro) qe = qe->bro; intro_var = 0; } str++; } return (q); }