/*
* Copyright (c) 2002, The Tendra Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* Crown Copyright (c) 1997, 1998
*
* This TenDRA(r) Computer Program is subject to Copyright
* owned by the United Kingdom Secretary of State for Defence
* acting through the Defence Evaluation and Research Agency
* (DERA). It is made available to Recipients with a
* royalty-free licence for its use, reproduction, transfer
* to other parties and amendment for any purpose not excluding
* product development provided that any such use et cetera
* shall be deemed to be acceptance of the following conditions:-
*
* (1) Its Recipients shall ensure that this Notice is
* reproduced upon any copies or amended versions of it;
*
* (2) Any amended version of it shall be clearly marked to
* show both the nature of and the organisation responsible
* for the relevant amendment or amendments;
*
* (3) Its onward transfer from a recipient to another
* party shall be deemed to be that party's acceptance of
* these conditions;
*
* (4) DERA gives no warranty or assurance as to its
* quality or suitability for any purpose and DERA accepts
* no liability whatsoever in relation to any use to which
* it may be put.
*
* $TenDRA: tendra/src/producers/common/parse/literal.c,v 1.17 2005/10/26 21:24:36 stefanf Exp $
*/
#include "config.h"
#include "producer.h"
#include
#include "cstring.h"
#include "fmm.h"
#include "msgcat.h"
#include "c_types.h"
#include "exp_ops.h"
#include "flt_ops.h"
#include "id_ops.h"
#include "itype_ops.h"
#include "nat_ops.h"
#include "str_ops.h"
#include "type_ops.h"
#include "error.h"
#include "catalog.h"
#include "option.h"
#include "basetype.h"
#include "cast.h"
#include "char.h"
#include "chktype.h"
#include "constant.h"
#include "convert.h"
#include "dump.h"
#include "exception.h"
#include "expression.h"
#include "hash.h"
#include "inttype.h"
#include "lex.h"
#include "literal.h"
#include "preproc.h"
#include "syntax.h"
#include "tok.h"
#include "token.h"
#include "ustring.h"
/*
* SPECIAL TABLE VALUES
*
* These macros are used in the tables of digits and escape sequences
* to indicate special values.
*/
#define NONE 0xff
#define OCTE 0xfe
#define HEXE 0xfd
#define UNI4 0xfc
#define UNI8 0xfb
/*
* TABLE OF DIGITS
*
* This table gives the mapping of characters to digits. The default
* table assumes the ASCII character set, for other codesets it needs
* to be rewritten. The valid digits are 0-9, A-Z (which evaluate to
* 10-35) and a-z (which evaluate to 10-35). Invalid digits are
* indicated by NONE.
*/
unsigned char digit_values [NO_CHAR + 1] = {
#define CHAR_DATA(A, B, C, D) (B),
#include "char.h"
#undef CHAR_DATA
NONE /* dummy */
};
/*
* TABLE OF ESCAPE SEQUENCES
*
* This table gives the mapping of characters to escape sequences. The
* default table assumes the ASCII character set, for other codesets it
* needs to be rewritten. The valid escape sequences are \', \", \?,
* \\, \a, \b, \f, \n, \r, \t and \v. Octal escape sequences are
* indicated by OCTE, hexadecimal escape sequences by HEXE, universal
* character names by UNI4 or UNI8, and illegal escape sequences by
* NONE.
*/
unsigned char escape_sequences [NO_CHAR + 1] = {
#define CHAR_DATA(A, B, C, D) (C),
#include "char.h"
#undef CHAR_DATA
NONE /* dummy */
};
/*
* SET AN ESCAPE SEQUENCE
*
* This routine sets the character escape value for the character
* literal expression a to be the character literal b, or an illegal
* escape if b is the null expression.
*/
void
set_escape(EXP a, EXP b)
{
int c = get_char_value (a);
int e = NONE;
if (!IS_NULL_exp (b)) {
e = get_char_value (b);
if (e == char_illegal) e = NONE;
}
if (c >= 0 && c < NO_CHAR) {
escape_sequences [c] = (unsigned char) e;
}
return;
}
/*
* CHECK A STRING OF DIGITS
*
* This routine scans the string s for valid digits for the given base.
* It returns a pointer to the first character which is not a valid
* digit.
*/
static string
check_digits(string s, unsigned base)
{
unsigned b;
character c;
while (c = *s, c != 0) {
#if FS_EXTENDED_CHAR
if (IS_EXTENDED (c)) break;
#endif
b = (unsigned) digit_values [c];
if (b >= base) break;
s++;
}
return (s);
}
/*
* EVALUATE A STRING OF DIGITS
*
* This routine evaluates the string of digits starting with s and
* ending with t using the given base (which will be at most 16). It
* is assumed that all of these digits are in the correct range.
*/
static NAT
eval_digits(string s, string t, unsigned base)
{
NAT n;
int m = 0;
string r = s;
unsigned long v = 0;
unsigned long b = (unsigned long) base;
while (r != t && m < 8) {
/* Evaluate first few digits */
unsigned long d = (unsigned long) digit_values [*r];
v = b * v + d;
m++;
r++;
}
n = make_nat_value (v);
while (r != t) {
/* Evaluate further digits */
unsigned d = (unsigned) digit_values [*r];
n = make_nat_literal (n, base, d);
r++;
}
return (n);
}
/*
* EVALUATE A STRING OF DIGITS
*
* This routine is the same as eval_digits except that it assumes that
* the result fits inside an unsigned long, and reports an error
* otherwise.
*/
static unsigned long
eval_char_digits(string s, string t, unsigned base)
{
string r;
int overflow = 0;
unsigned long n = 0;
unsigned long b = (unsigned long) base;
for (r = s; r != t; r++) {
unsigned long m = n;
n = b * n + (unsigned long) digit_values [*r];
if (n < m) overflow = 1;
}
if (overflow) report (crt_loc, ERR_lex_ccon_large ());
return (n);
}
/*
* EVALUATE A LINE NUMBER
*
* This routine evaluates the sequence of decimal digits s as a line
* number in a #line, or similar, preprocessing directive. Any errors
* arising are indicated using err. This is a bit pattern consisting
* of 2 if s is not a simple string of decimal digits, and 1 if its
* value exceeds the currently set limit.
*/
unsigned long
eval_line_digits(string s, unsigned *err)
{
string r;
unsigned e = 0;
unsigned long n = 0;
string t = check_digits (s, (unsigned) 10);
if (*t) e = 2;
for (r = s; r != t; r++) {
int d = digit_values [*r];
if (n > (ULONG_MAX - d) / 10) {
e |= 1;
break;
}
n = 10 * n + d;
}
if (n > option_value (OPT_VAL_hash_line_number))
e |= 1;
*err = e;
return (n);
}
/*
* STRING HASH TABLE
*
* This variable gives the hash table used in shared string literals.
*/
static STRING *string_hash_table = NULL;
#define HASH_STRING_SIZE ((unsigned long) 256)
/*
* STRING AND CHARACTER LITERAL TYPES
*
* The type of a simple character literal is char in C++, but int in C.
* The variable type_char_lit is used to hold the appropriate result
* type. Other string and character literals have fixed types, however
* for convenience variables are used to identify them.
*/
static TYPE type_char_lit;
static TYPE type_mchar_lit;
static TYPE type_wchar_lit;
static TYPE type_string_lit;
static TYPE type_wstring_lit;
CV_SPEC cv_string = cv_none;
/*
* SET THE CHARACTER LITERAL TYPE
*
* This routine sets the type of a character literal to be t. t must be
* an integral type. Note that only the representation type is set to t,
* the semantic type is always char.
*/
void
set_char_lit(TYPE t)
{
if (IS_type_integer (t)) {
INT_TYPE r = DEREF_itype (type_integer_rep (t));
INT_TYPE s = DEREF_itype (type_integer_rep (type_char));
type_char_lit = make_itype (r, s);
} else {
report (preproc_loc, ERR_pragma_char_lit (t));
}
return;
}
/*
* TABLE OF INTEGER LITERAL SPECIFICATIONS
*
* The type LITERAL_INFO is used to represent an item in an integer
* literal type specification. The table int_lit_spec holds the
* specifications for the various combinations of base and suffix.
*/
typedef struct lit_info_tag {
int tag;
TYPE type;
NAT bound;
IDENTIFIER tok;
int tok_no;
int opt;
struct lit_info_tag *next;
} LITERAL_INFO;
static LITERAL_INFO *int_lit_spec [BASE_NO] [SUFFIX_NO] = {
{ NULL, NULL, NULL, NULL, NULL, NULL },
{ NULL, NULL, NULL, NULL, NULL, NULL },
{ NULL, NULL, NULL, NULL, NULL, NULL }
};
static LITERAL_INFO *crt_int_lit = NULL;
static LITERAL_INFO **ptr_int_lit = NULL;
/*
* TABLE OF BUILT-IN INTEGER LITERAL SPECIFICATIONS
*
* This table gives the possible types and built-in tokens for the
* various base and suffix combinations.
*/
static struct {
unsigned char type [6];
int tok;
LIST (TYPE) cases;
} int_lit_tok [BASE_NO] [SUFFIX_NO] = {
{
{ { 2, 0, 2, 2, 1, 1 }, TOK_lit_int, NULL_list (TYPE) },
{ { 0, 2, 0, 2, 0, 1 }, TOK_lit_unsigned, NULL_list (TYPE) },
{ { 0, 0, 2, 2, 1, 1 }, TOK_lit_long, NULL_list (TYPE) },
{ { 0, 0, 0, 2, 0, 1 }, TOK_lit_ulong, NULL_list (TYPE) },
{ { 0, 0, 0, 0, 2, 2 }, TOK_lit_llong, NULL_list (TYPE) },
{ { 0, 0, 0, 0, 0, 2 }, TOK_lit_ullong, NULL_list (TYPE) }
},
{
{ { 2, 2, 2, 2, 1, 1 }, TOK_lit_hex, NULL_list (TYPE) },
{ { 0, 2, 0, 2, 0, 1 }, TOK_lit_unsigned, NULL_list (TYPE) },
{ { 0, 0, 2, 2, 1, 1 }, TOK_lit_long, NULL_list (TYPE) },
{ { 0, 0, 0, 2, 0, 1 }, TOK_lit_ulong, NULL_list (TYPE) },
{ { 0, 0, 0, 0, 2, 2 }, TOK_lit_llong, NULL_list (TYPE) },
{ { 0, 0, 0, 0, 0, 2 }, TOK_lit_ullong, NULL_list (TYPE) }
},
{
{ { 2, 2, 2, 2, 1, 1 }, TOK_lit_hex, NULL_list (TYPE) },
{ { 0, 2, 0, 2, 0, 1 }, TOK_lit_unsigned, NULL_list (TYPE) },
{ { 0, 0, 2, 2, 1, 1 }, TOK_lit_long, NULL_list (TYPE) },
{ { 0, 0, 0, 2, 0, 1 }, TOK_lit_ulong, NULL_list (TYPE) },
{ { 0, 0, 0, 0, 2, 2 }, TOK_lit_llong, NULL_list (TYPE) },
{ { 0, 0, 0, 0, 0, 2 }, TOK_lit_ullong, NULL_list (TYPE) }
}
};
/*
* INITIALISE TABLE OF INTEGER LITERAL TYPES
*
* This routine initialises the string and character literal types and
* the table int_lit_info. The initial values for the table are given
* by the following lists of types:
*
* decimal: (int, long, unsigned long),
* octal/hex: (int, unsigned, long, unsigned long),
* U suffix: (unsigned, unsigned long),
* L suffix: (long, unsigned long),
* UL suffix: (unsigned long),
* LL suffix: (long long, unsigned long long),
* ULL suffix: (unsigned long long).
*
* Each integer literal is checked against each type in the list
* indicated by the form of the literal. If it fits into a type then
* that is the type of the literal. If it does not fit into any type
* then an error is raised. If whether it fits into a particular type
* is target dependent then a literal integer type, giving the literal
* value and a list of possible types, is constructed to express the
* result type.
*
* The string and character types are:
*
* character: char,
* multi-character: int,
* wide character: wchar_t,
* string: const char [n],
* wide string: const wchar_t [n].
*
* Variants are that characters have type int in C and that string
* literals are not const in pre-ISO C++ and C.
*/
void
init_literal(void)
{
int b, s;
BUILTIN_TYPE n;
OPTION opt = option (OPT_int_overflow);
ASSERT (!IS_NULL_type (type_char));
/* String and character literal types */
type_mchar_lit = type_sint;
type_wchar_lit = type_wchar_t;
type_string_lit = type_char;
type_wstring_lit = type_wchar_t;
#if LANGUAGE_CPP
set_char_lit (type_char);
set_string_qual (cv_const);
#else
set_char_lit (type_sint);
set_string_qual (cv_none);
#endif
/* Set up type lists */
for (b = 0; b < BASE_NO; b++) {
for (s = 0; s < SUFFIX_NO; s++) {
LIST (TYPE) p = NULL_list (TYPE);
begin_literal (b, s);
for (n = 0; n < 6; n++) {
if (int_lit_tok [b] [s].type [n] == 2) {
TYPE t = type_builtin [ntype_sint + n];
add_range_literal (NULL_exp, 1);
add_type_literal (t);
CONS_type (t, p, p);
}
}
add_range_literal (NULL_exp, 0);
add_token_literal (NULL_id, (unsigned) opt);
p = REVERSE_list (p);
int_lit_tok [b] [s].cases = uniq_type_set (p);
}
}
/* Set up string hash table */
if (string_hash_table == NULL) {
unsigned long i;
STRING *q = xmalloc_nof (STRING, HASH_STRING_SIZE);
for (i = 0; i < HASH_STRING_SIZE; i++) q [i] = NULL_str;
string_hash_table = q;
}
return;
}
/*
* SET THE CV-QUALIFIERS FOR A STRING LITERAL
*
* This routine sets the string and wide string literal types to be
* cv-qualified.
*/
void
set_string_qual(CV_SPEC cv)
{
type_string_lit = qualify_type (type_string_lit, cv, 0);
type_wstring_lit = qualify_type (type_wstring_lit, cv, 0);
cv_string = cv;
return;
}
/*
* BEGIN A LITERAL SPECIFICATION DEFINITION
*
* This routine is called to begin the specification of the integer
* literals of the given base and suffix.
*/
void
begin_literal(int base, int suff)
{
LITERAL_INFO **p = &(int_lit_spec [base] [suff]);
*p = NULL;
ptr_int_lit = p;
crt_int_lit = NULL;
return;
}
/*
* ADD A BOUND TO A LITERAL SPECIFICATION
*
* This routine is used to specify a bound in the current literal
* specification. If n is 0 then the bound matches all values, if it
* is 1 then the bound matches all the values in the following type,
* and if it is 2 then the bound matches all values less than or equal
* to the integer literal expression e.
*/
void
add_range_literal(EXP e, int n)
{
LITERAL_INFO *p = xmalloc (sizeof(*p));
p->tag = n;
if (!IS_NULL_exp (e) && IS_exp_int_lit (e)) {
p->bound = DEREF_nat (exp_int_lit_nat (e));
} else {
p->bound = small_nat [0];
}
p->type = NULL_type;
p->tok = NULL_id;
p->tok_no = -1;
p->opt = OPT_none;
p->next = NULL;
*ptr_int_lit = p;
crt_int_lit = p;
ptr_int_lit = &(p->next);
return;
}
/*
* ADD A TYPE TO A LITERAL SPECIFICATION
*
* This routine specifies the type t for all values under the current
* bound in the current literal specification.
*/
void
add_type_literal(TYPE t)
{
NAT n;
LITERAL_INFO *p = crt_int_lit;
if (IS_type_integer (t)) {
if (!is_arg_promote (t)) {
/* Type should promote to itself */
report (preproc_loc, ERR_pragma_lit_type (t));
t = promote_type (t);
}
} else {
/* Type should be integral */
if (!IS_type_error (t)) {
report (preproc_loc, ERR_pragma_lit_type (t));
}
t = type_ulong;
}
p->type = qualify_type (t, cv_none, 0);
n = p->bound;
if (p->tag == 2) {
if (check_nat_range (t, n) != NAT_FIT) {
/* Given bound should fit into type */
report (preproc_loc, ERR_pragma_lit_range (n, t));
n = max_type_value (t, 0);
}
} else {
n = max_type_value (t, 0);
}
p->bound = n;
return;
}
/*
* ADD A TOKEN TO A LITERAL SPECIFICATION
*
* This routine specifies that the token id should be used to calculate
* the type for all values under the current bound in the current
* literal specification. An error with severity sev is reported.
*/
void
add_token_literal(IDENTIFIER id, unsigned sev)
{
int n = -1;
LITERAL_INFO *p = crt_int_lit;
if (!IS_NULL_id (id)) {
id = resolve_token (id, "ZZ", 0);
if (!IS_NULL_id (id)) n = builtin_token (id);
}
if (p->tag == 1) {
report (preproc_loc, ERR_pragma_lit_question ());
p->tag = 3;
}
p->tok = id;
p->tok_no = n;
switch (sev) {
case OPTION_ON : p->opt = OPT_error; break;
case OPTION_WARN : p->opt = OPT_warning; break;
default : p->opt = OPT_none; break;
}
return;
}
/*
* FIND AN INTEGER LITERAL TYPE
*
* This routine finds the type of the integer constant lit specified
* with base base and suffix suff. num gives the text used to specify
* the constant for the purposes of error reporting. fit is set to
* true if lit definitely fits into the result.
*/
TYPE
find_literal_type(NAT lit, int base, int suff, string num, int *fit)
{
TYPE t;
int tok;
INT_TYPE it;
int big = 0;
int have_tok = 0;
int opt = OPT_error;
NAT n = small_nat [0];
IDENTIFIER tid = NULL_id;
LIST (TYPE) qt = NULL_list (TYPE);
LITERAL_INFO *pt = int_lit_spec [base] [suff];
/* Deal with calculated literals */
switch (TAG_nat (lit)) {
case nat_neg_tag : {
lit = DEREF_nat (nat_neg_arg (lit));
t = find_literal_type (lit, base, suff, num, fit);
return (t);
}
case nat_token_tag : {
t = type_sint;
*fit = 1;
return (t);
}
case nat_calc_tag : {
EXP e = DEREF_exp (nat_calc_value (lit));
t = DEREF_type (exp_type (e));
*fit = 1;
return (t);
}
}
/* Deal with simple literals */
while (pt != NULL) {
int ch = 4;
TYPE s = pt->type;
switch (pt->tag) {
case 0 : {
TYPE r = s;
if (IS_NULL_type (r)) r = type_ulong;
ch = check_nat_range (r, lit);
if (ch == NAT_FIT) *fit = 1;
if (ch >= NAT_NEVERFIT_SIGNED) big = ch;
if (big) n = max_type_value (NULL_type, 0);
ch = big;
break;
}
case 1 : {
n = pt->bound;
ch = check_nat_range (s, lit);
if (ch == NAT_FIT) *fit = 1;
break;
}
case 2 : {
n = pt->bound;
if (compare_nat (n, lit) >= 0) {
if (!IS_NULL_type (s)) *fit = 1;
ch = 0;
}
break;
}
}
if (ch == NAT_FIT) {
/* lit definitely fits into bound */
if (!IS_NULL_type (s) && IS_NULL_list (qt)) {
/* No previous fit */
return (s);
}
}
if (ch <= NAT_MAYFIT_UNSIGNED) {
/* lit may fit into bound */
if (!IS_NULL_type (s)) {
if (have_tok == 0) {
INT_TYPE is = DEREF_itype (type_integer_rep (s));
LIST (TYPE) st = DEREF_list (itype_cases (is));
qt = union_type_set (qt, st);
if (ch == NAT_FIT) have_tok = 1;
}
} else {
if (have_tok == 0 && !IS_NULL_id (pt->tok)) {
DESTROY_list (qt, SIZE_type);
tok = int_lit_tok [base] [suff].tok;
if (pt->tok_no == tok) {
qt = int_lit_tok [base] [suff].cases;
} else if (suff < SUFFIX_LL) {
qt = all_prom_types;
} else {
qt = all_llong_types;
}
have_tok = 2;
}
break;
}
}
if (ch >= NAT_NEVERFIT_SIGNED) big = ch;
pt = pt->next;
}
/* Tokenised result */
if (have_tok != 2) {
/* Find list of possible types */
if (IS_NULL_list (qt)) {
qt = int_lit_tok [base] [suff].cases;
} else {
qt = REVERSE_list (qt);
qt = uniq_type_set (qt);
}
}
if (pt) {
/* Get token information from table */
tid = pt->tok;
opt = pt->opt;
}
if (num && !(*fit)) {
/* Report error if necessary */
ERROR err = ERR_lex_icon_large (num, n);
err = set_severity (err, opt, 0);
if (!IS_NULL_err (err)) report (crt_loc, err);
}
if (LENGTH_list (qt) == 1) {
/* Only one possible case */
t = DEREF_type (HEAD_list (qt));
DESTROY_list (qt, SIZE_type);
return (t);
}
tok = int_lit_tok [base] [suff].tok;
MAKE_itype_literal (NULL_type, qt, lit, tok, base, suff, tid, it);
t = promote_itype (it, it);
return (t);
}
/*
* SHIFT A HEXADECIMAL STRING
*
* This routine shifts the hexadecimal string s to the right by n bits.
* The value n must be less than 4. The vacant bits at s' leftmost
* position are filled with shift_in. If shift_out a null pointer, an
* additional character that holds the shifted out bits is added.
* Otherwise the bits are stored in *shift_out.
*/
static string
shift_hex_string(string s, unsigned n, unsigned shift_in, unsigned *shift_out)
{
static const char hex_values[] = "0123456789abcdef";
unsigned c, mask, v, x;
string out, ret;
ASSERT (n <= 3);
if (n == 0) return ustring_copy (s);
out = ret = ustring_alloc (ustrlen (s) + (shift_out != NULL ? 1 : 2));
mask = (1u << n) - 1;
c = shift_in;
while ((v = digit_values[*s++]) < 16) {
x = (v >> n) | c;
c = (v & mask) << (4 - n);
*out++ = hex_values[x];
}
if (shift_out != NULL) {
*shift_out = c;
} else {
*out++ = hex_values[c];
}
*out++ = '\0';
return ret;
}
/*
* ANALYSE AN INTEGER OR FLOATING LITERAL
*
* This routine analyses the integer or floating literal given by the
* string str, constructing the corresponding expression. The location
* given by ptok is assigned with lex_integer_Hexp or lex_floating_Hexp
* depending on the form of the literal. Note that str can be an area
* of read-only memory for integer literals, but not for floating
* literals.
*/
EXP
make_literal_exp(string str, int *ptok, int force)
{
EXP e;
string r;
int err = 0;
int flt = 0;
int pexp;
string s = str;
unsigned base = 10;
string int_part, frac_part;
string dot_posn = NULL;
string exp_posn = NULL;
int form = BASE_DECIMAL;
/* Check small literals */
character c1 = s [0];
character c2 = 0;
if (c1) c2 = s [1];
if (c2 == 0 && (c1 >= char_zero && c1 <= char_nine)) {
unsigned etag = exp_int_lit_tag;
int n = (int) digit_values [c1];
NAT lit = small_nat [n];
if (IS_NULL_nat (lit)) lit = make_small_nat (n);
if (n == 0) etag = exp_null_tag;
MAKE_exp_int_lit (type_sint, lit, etag, e);
*ptok = lex_integer_Hexp;
return (e);
}
if (c1 == char_zero && (c2 == char_x || c2 == char_X)) {
/* Hexadecimal literal */
base = 16;
form = BASE_HEXADECIMAL;
s += 2;
}
int_part = s;
s = check_digits (int_part, base);
if (s[0] == char_dot) {
dot_posn = s;
/* Fractional component of floating literal */
frac_part = s + 1;
s = check_digits (frac_part, base);
flt = 1;
}
exp_posn = s;
pexp = (s[0] == char_p || s[0] == char_P);
if (pexp || s[0] == char_e || s[0] == char_E) {
/* Exponent component of floating literal */
if (s[1] == char_plus || s[1] == char_minus) s++;
r = s + 1;
s = check_digits (r, 10);
if (s == r) err = 1;
flt = 1;
}
if (c1 == char_zero && base == 10 && !flt) {
/* Octal integer */
base = 8;
form = BASE_OCTAL;
r = check_digits (str, base);
if (r != s) {
/* Digits contain 8 or 9 */
report (crt_loc, ERR_lex_icon_octal (str));
}
}
if (flt) {
/* Floating literals */
int zero;
NAT expon;
character ep;
string suff_posn = s;
unsigned trail_zero = 0;
FLOAT lit = NULL_flt;
TYPE t = type_double;
/* Exponent char and base must match. Exponent must exist for
* hexadecimal floating point literals. */
if ((base == 16) != pexp) {
err = 1;
}
if (base == 16) {
/* No digits */
if (exp_posn - int_part <= 1) {
if (exp_posn == int_part || dot_posn != NULL) err = 1;
}
report (crt_loc, ERR_lex_fcon_hex ());
}
/* Check float suffix */
c1 = s [0];
if (c1 == char_f || c1 == char_F) {
/* Suffix F */
t = type_float;
s++;
c1 = s [0];
} else if (c1 == char_l || c1 == char_L) {
/* Suffix L */
t = type_ldouble;
s++;
c1 = s [0];
}
/* Check for end of number */
if (c1 || err) report (crt_loc, ERR_lex_literal_bad (str));
/* Find number components (involves writing to s) */
while (int_part [0] == char_zero) {
/* Remove initial zeros */
int_part++;
}
if (dot_posn) {
dot_posn [0] = 0;
if (int_part == dot_posn) int_part = small_number [0];
if (frac_part == exp_posn) {
frac_part = small_number [0];
} else {
/* Remove trailing zeros */
string frac_zero = exp_posn - 1;
while (frac_zero [0] == char_zero) {
frac_zero [0] = 0;
frac_zero--;
trail_zero++;
}
if (frac_zero == dot_posn) frac_part = small_number [0];
}
} else {
if (int_part == exp_posn) int_part = small_number [0];
frac_part = small_number [0];
}
ep = exp_posn [0];
exp_posn [0] = 0;
if (ep == char_e || ep == char_E || ep == char_p || ep == char_P) {
/* Evaluate exponent */
r = exp_posn + 1;
c2 = r [0];
if (c2 == char_minus || c2 == char_plus) r++;
expon = eval_digits (r, suff_posn, 10);
if (c2 == char_minus) expon = negate_nat (expon);
zero = is_zero_nat (expon);
} else {
expon = small_nat [0];
zero = 1;
}
if (zero && ustreq (frac_part, small_number [0])) {
int i;
for (i = 0; i < SMALL_FLT_SIZE; i++) {
if (ustreq (int_part, small_number [i])) {
lit = get_float (t, i);
break;
}
}
}
if (base == 16) {
/* A hexadecimal floating point literal represents the value
* q*2^e where q is the mantissa and e the exponent. For TDF's
* make_floating we have to convert that to q'*16^e' because the
* base value is used for the representation of the mantissa and
* the exponent. */
unsigned long n;
unsigned c;
if (is_negative_nat (expon)) {
/* q' = q >> (-e % 4)
* e' = -(-e / 4) */
NAT m = negate_nat (expon);
n = get_nat_value (m);
if (n == EXTENDED_MAX) err = 1;
expon = negate_nat (make_nat_value (n / 4));
n %= 4;
} else {
/* q' = q >> (3 - (e + 3) % 4)
* e' = (e + 3) / 4 */
n = get_nat_value (expon);
if (n == EXTENDED_MAX) err = 1;
expon = make_nat_value ((n + 3) / 4);
n = 3 - ((n + 3) % 4);
}
/* Compute q' */
int_part = shift_hex_string (int_part, n, 0, &c);
frac_part = shift_hex_string (frac_part, n, c, NULL);
} if (IS_NULL_flt (lit)) {
int_part = ustring_copy (int_part);
frac_part = ustring_copy (frac_part);
}
if (trail_zero) {
/* Restore trailing zeros */
r = exp_posn - 1;
do {
r [0] = char_zero;
r--;
trail_zero--;
} while (trail_zero);
}
if (dot_posn) dot_posn [0] = char_dot;
exp_posn [0] = ep;
/* Construct result - type is as per suffix */
if (IS_NULL_flt (lit)) {
MAKE_flt_simple (int_part, frac_part, expon, lit);
COPY_unsigned (flt_simple_base (lit), base);
}
MAKE_exp_float_lit (t, lit, e);
*ptok = lex_floating_Hexp;
} else {
/* Integer literals */
TYPE t;
NAT lit;
int ls = 0;
int us = 0;
int fit = 0;
/* Find integer value */
r = str;
if (form == BASE_HEXADECIMAL) r += 2;
lit = eval_digits (r, s, base);
/* Check integer suffix */
c1 = s [0];
if (c1 == char_u || c1 == char_U) {
us = 1;
s++;
c1 = s [0];
}
if (c1 == char_l || c1 == char_L) {
ls = 1;
if (s [1] == c1 && basetype_info [ntype_sllong].key) {
report (crt_loc, ERR_lex_icon_llong (str));
ls = 2;
s++;
}
s++;
c1 = s [0];
} else {
/* Map 'int' to 'long' in '#if' expressions */
if (in_hash_if_exp) ls = 1;
}
if (us == 0 && (c1 == char_u || c1 == char_U)) {
us = 1;
s++;
c1 = s [0];
}
/* Check for end of number */
if (c1 || err) report (crt_loc, ERR_lex_literal_bad (str));
/* Find literal type */
if (force) {
t = type_ulong;
fit = 1;
} else {
int suff = SUFFIX (us, ls);
t = find_literal_type (lit, form, suff, str, &fit);
}
MAKE_exp_int_lit (t, lit, exp_int_lit_tag, e);
if (!fit) {
/* Force result to be a calculated value */
MAKE_exp_cast (t, CONV_INT_INT, e, e);
MAKE_nat_calc (e, lit);
MAKE_exp_int_lit (t, lit, exp_int_lit_tag, e);
}
*ptok = lex_integer_Hexp;
}
return (e);
}
/*
* IS A FLOATING LITERAL ZERO?
*
* This routine checks whether the floating point literal f is zero.
*/
int
is_zero_float(FLOAT f)
{
string s;
character c;
s = DEREF_string (flt_simple_int_part (f));
while (c = *(s++), c != 0) {
if (c != char_zero) return (0);
}
s = DEREF_string (flt_simple_frac_part (f));
while (c = *(s++), c != 0) {
if (c != char_zero) return (0);
}
return (1);
}
/*
* ARE TWO FLOATING LITERALS EQUAL?
*
* This routine checks whether the floating point literals f and g are
* equal. Note that this is equality of representation rather than
* equality of the underlying numbers.
*/
int
eq_float_lit(FLOAT f, FLOAT g)
{
NAT ef, eg;
ulong nf, ng;
string af, ag;
string bf, bg;
unsigned cf, cg;
if (EQ_flt (f, g)) return (1);
DECONS_flt_simple (nf, af, bf, ef, cf, f);
DECONS_flt_simple (ng, ag, bg, eg, cg, g);
if (!ustreq (af, ag)) return (0);
if (!ustreq (bf, bg)) return (0);
if (cf != cg) return (0);
if (compare_nat (ef, eg) != 0) return (0);
if (nf == LINK_NONE) COPY_ulong (flt_tok (f), ng);
if (ng == LINK_NONE) COPY_ulong (flt_tok (g), nf);
return (1);
}
/*
* DEFAULT ROUNDING MODE
*
* This variable gives the default rounding mode used for converting
* floating point expressions to integers.
*/
RMODE crt_round_mode = rmode_to_zero;
/*
* ROUND A FLOATING POINT LITERAL
*
* This routine rounds the floating point literal f to an integer
* literal by the rounding mode corresponding to mode. The null integer
* literal is returned to indicate a target dependent literal. The
* range of values in which the result is target independent is actually
* rather small - it is given by FLT_DIG.
*/
NAT
round_float_lit(FLOAT f, RMODE mode)
{
NAT res;
unsigned long i, j, n;
unsigned long res_len;
unsigned long pre_len;
unsigned long exp_val;
character result [100];
/* Decompose simple literal */
string int_part = DEREF_string (flt_simple_int_part (f));
string frac_part = DEREF_string (flt_simple_frac_part (f));
unsigned base = DEREF_unsigned (flt_simple_base (f));
NAT expon = DEREF_nat (flt_simple_exponent (f));
/* Find component lengths */
unsigned long int_len = (unsigned long) ustrlen (int_part);
unsigned long frac_len = (unsigned long) ustrlen (frac_part);
/* Allow for initial zeros */
while (int_part [0] == char_zero) {
int_part++;
int_len--;
}
/* Allow for exponent */
if (IS_nat_neg (expon)) {
expon = DEREF_nat (nat_neg_arg (expon));
exp_val = get_nat_value (expon);
if (exp_val > int_len) {
res_len = 0;
pre_len = exp_val - int_len;
} else {
res_len = int_len - exp_val;
pre_len = 0;
}
} else {
exp_val = get_nat_value (expon);
res_len = int_len + exp_val;
pre_len = 0;
}
/* Allow for initial zeros in fractional part */
if (int_part [0] == 0) {
while (frac_part [0] == char_zero) {
frac_part++;
frac_len--;
if (res_len == 0) {
pre_len++;
} else {
res_len--;
}
}
if (frac_part [0] == 0) {
/* Zero floating literal */
res = small_nat [0];
return (res);
}
}
/* Extreme values are target dependent */
if (pre_len > 6) return (NULL_nat);
if (res_len > 6) return (NULL_nat);
if (exp_val == EXTENDED_MAX) return (NULL_nat);
/* Construct integer string */
j = 0;
n = res_len;
for (i = 0; i < pre_len; i++) {
if (j < n) {
result [j] = char_zero;
j++;
}
}
for (i = 0; i < int_len; i++) {
if (j < n) {
result [j] = int_part [i];
j++;
}
}
for (i = 0; i < frac_len; i++) {
if (j < n) {
result [j] = frac_part [i];
j++;
}
}
for (; j < n; j++) result [j] = char_zero;
result [n] = 0;
/* Calculate the result */
res = eval_digits (result, result + res_len, base);
UNUSED (mode);
return (res);
}
/*
* EVALUATE A UNICODE CHARACTER
*
* This routine evaluates the unicode character with prefix c, consisting
* of n hex digits, given by ps. ps is advanced to the position following
* the hex digits.
*/
unsigned long
eval_unicode(int c, unsigned n, int *pc, string *ps, ERROR *err)
{
string r = *ps;
unsigned long u;
unsigned base = 16;
string s = check_digits (r, base);
unsigned m = (unsigned) (s - r);
if (m < n) {
add_error (err, ERR_lex_charset_len (c, n));
} else {
s = r + n;
}
*ps = s;
u = eval_char_digits (r, s, base);
add_error (err, ERR_lex_charset_replace (u));
if (u < 0x20 || (u >= 0x7f && u <= 0x9f) || is_legal_char (u)) {
add_error (err, ERR_lex_charset_bad (u));
*pc = CHAR_SIMPLE;
} else {
if (u <= (unsigned long) 0xffff) *pc = CHAR_UNI4;
}
return (u);
}
/*
* GET A MULTI-BYTE CHARACTER FROM A STRING
*
* This routine returns the multi-byte character pointed to by the
* string s. It assigns the character type to pc.
*/
unsigned long
get_multi_char(string s, int *pc)
{
int i;
unsigned long n = 0;
for (i = MULTI_WIDTH - 1; i >= 1; i--) {
n = (n << 8) + (unsigned long) s [i];
}
*pc = (int) s [0];
return (n);
}
/*
* ADD A MULTI-BYTE CHARACTER TO A STRING
*
* This routine adds the multi-byte character n of type ch to the
* string s. A multi-byte character is represented by 5 characters.
* The first is a key describing how the character was described (a
* simple character, a hex or octal escape sequence, a unicode
* character etc.). The next four characters give the character value.
*/
void
add_multi_char(string s, unsigned long n, int ch)
{
int i;
s [0] = (character) ch;
for (i = 1; i < MULTI_WIDTH; i++) {
s [i] = (character) (n & 0xff);
n >>= 8;
}
if (n) report (crt_loc, ERR_lex_ccon_large ());
return;
}
/*
* CREATE A MULTI-BYTE STRING
*
* This routine creates a multi-byte string of length n in s from the
* string t of kind k.
*/
static void
make_multi_string(string s, string t, unsigned long n, unsigned k)
{
if (k & STRING_MULTI) {
n *= MULTI_WIDTH;
xumemcpy (s, t, (size_t) n);
} else {
unsigned long i;
for (i = 0; i < n; i++) {
add_multi_char (s, (unsigned long) *t, CHAR_SIMPLE);
s += MULTI_WIDTH;
t++;
}
}
return;
}
/*
* GET A MULTIBYTE CHARACTER FROM A STRING
*
* This routine reads a multibyte character from the string s (which
* ends at se). The value (as a wide character) is assigned to pc with
* the new value of s being returned. Note that this routine is not
* required in, for example, check_digits because the representation
* of a simple single byte character as a multibyte character comprises
* that single byte.
*/
#if FS_MULTIBYTE
static string
get_multibyte(string s, string se, unsigned long *pc)
{
wchar_t c;
int n = mbtowc (&c, s, (size_t) (se - s));
if (n > 0) {
/* Valid multibyte character */
*pc = (unsigned long) c;
s += n;
} else if (n == 0) {
/* Null character */
*pc = 0;
s++;
} else {
/* Invalid multibyte character */
report (crt_loc, ERR_lex_ccon_multibyte ());
*pc = (unsigned long) *(s++);
}
return (s);
}
#endif
/*
* ANALYSE A STRING OR CHARACTER LITERAL
*
* This routine analyses the string or character literal given by the
* string s (which ends at se). Only characters in the range [0,0xff]
* are assumed to be valid. Note that this is the routine which should
* do the mapping from the source character set to the execution
* character set (translation phase 5), however this is deferred until
* the string output routines.
*/
STRING
new_string_lit(string s, string se, int lex)
{
STRING res;
STRING prev;
int multi = 0;
int overflow = 0;
unsigned long len = 0;
unsigned kind = STRING_NONE;
#if FS_MULTIBYTE
int multibyte = allow_multibyte;
#endif
size_t sz = (size_t) (se - s) + 1;
string str = ustring_alloc (sz);
/* Find string type */
switch (lex) {
case lex_char_Hlit :
case lex_char_Hexp : {
kind = STRING_CHAR;
break;
}
case lex_wchar_Hlit :
case lex_wchar_Hexp : {
kind = (STRING_WIDE | STRING_CHAR);
break;
}
case lex_string_Hlit :
case lex_string_Hexp : {
kind = STRING_NONE;
break;
}
case lex_wstring_Hlit :
case lex_wstring_Hexp : {
kind = STRING_WIDE;
break;
}
}
if (do_string) dump_string_lit (s, se, kind);
/* Scan string replacing escape sequences */
while (s != se) {
unsigned long c;
int ch = CHAR_SIMPLE;
#if FS_MULTIBYTE
if (multibyte) {
s = get_multibyte (s, se, &c);
} else {
c = (unsigned long) *(s++);
}
#else
c = (unsigned long) *(s++);
#endif
if (c == char_backslash) {
if (s != se) {
/* Unterminated string literals already reported */
character e = NONE;
#if FS_MULTIBYTE
if (multibyte) {
s = get_multibyte (s, se, &c);
} else {
c = (unsigned long) *(s++);
}
#else
c = (unsigned long) *(s++);
#endif
if (c < NO_CHAR) e = escape_sequences [c];
switch (e) {
case OCTE : {
/* Octal escape sequences */
unsigned base = 8;
string r = s - 1;
s = check_digits (r, base);
if (s > r + 3) s = r + 3;
c = eval_char_digits (r, s, base);
ch = CHAR_OCTAL;
break;
}
case HEXE : {
/* Hexadecimal escape sequences */
unsigned base = 16;
string r = s;
s = check_digits (r, base);
if (s == r) {
int i = (int) c;
report (crt_loc, ERR_lex_ccon_hex (i));
} else {
c = eval_char_digits (r, s, base);
}
ch = CHAR_HEX;
break;
}
case UNI4 : {
/* Short unicode escape sequences */
if (allow_unicodes) {
string r = s;
unsigned d = 4;
ERROR err = NULL_err;
c = eval_unicode (char_u, d, &ch, &r, &err);
if (!IS_NULL_err (err)) {
report (crt_loc, err);
}
ch = CHAR_UNI4;
s = r;
break;
}
goto illegal_lab;
}
case UNI8 : {
/* Long unicode escape sequences */
if (allow_unicodes) {
string r = s;
unsigned d = 8;
ERROR err = NULL_err;
c = eval_unicode (char_U, d, &ch, &r, &err);
if (!IS_NULL_err (err)) {
report (crt_loc, err);
}
ch = CHAR_UNI8;
s = r;
break;
}
goto illegal_lab;
}
case NONE :
illegal_lab : {
/* Illegal escape sequences */
int i = (int) c;
report (crt_loc, ERR_lex_ccon_escape (i));
break;
}
default : {
/* Simple escape sequences */
c = (unsigned long) e;
break;
}
}
}
}
if ((ch != CHAR_SIMPLE || c >= 256) && !multi) {
/* Convert to multi-character format */
string a;
sz *= MULTI_WIDTH;
a = ustring_alloc (sz);
make_multi_string (a, str, len, kind);
if (len) {
len *= MULTI_WIDTH;
if (len == 0) overflow = 1;
}
if (c >= 256) {
/* Mark fat strings */
if (!(kind & STRING_WIDE)) {
if (ch == CHAR_UNI4 || ch == CHAR_UNI8) {
/* EMPTY */
} else {
report (crt_loc, ERR_lex_ccon_large ());
}
}
kind |= STRING_FAT;
}
kind |= STRING_MULTI;
multi = 1;
str = a;
}
if (multi) {
add_multi_char (str + len, c, ch);
len += MULTI_WIDTH;
} else {
str [len++] = (character) c;
}
if (len == 0) overflow = 1;
}
if (multi) {
add_multi_char (str + len, (unsigned long) 0, CHAR_OCTAL);
len /= MULTI_WIDTH;
} else {
str [len] = 0;
}
if (overflow) len = ULONG_MAX;
if (!check_value (OPT_VAL_string_length, len)) {
len = option_value (OPT_VAL_string_length);
if (multi) {
unsigned long n = MULTI_WIDTH * len;
add_multi_char (str + n, (unsigned long) 0, CHAR_OCTAL);
} else {
str [len] = 0;
}
}
MAKE_str_simple (len, str, kind, res);
prev = share_string_lit (res);
if (!EQ_str (prev, res)) {
/* Share string literals */
unsigned long v;
DESTROY_str_simple (destroy, res, len, str, kind, v, res);
ustring_free (str);
UNUSED (res);
UNUSED (len);
UNUSED (kind);
UNUSED (v);
res = prev;
}
return (res);
}
/*
* ARE TWO STRINGS EQUAL?
*
* This routine checks whether the string literals s and t are equal.
*/
int
eq_string_lit(STRING s, STRING t)
{
string as, at;
unsigned ks, kt;
unsigned long ns, nt;
if (EQ_str (s, t)) return (1);
ks = DEREF_unsigned (str_simple_kind (s));
kt = DEREF_unsigned (str_simple_kind (t));
ns = DEREF_ulong (str_simple_len (s));
nt = DEREF_ulong (str_simple_len (t));
if (ks == kt && ns == nt) {
as = DEREF_string (str_simple_text (s));
at = DEREF_string (str_simple_text (t));
if (as == at) return (1);
if (ks & STRING_MULTI) ns *= MULTI_WIDTH;
if (xumemcmp (as, at, (size_t) ns) == 0) return (1);
}
return (0);
}
/*
* CONCATENATE TWO STRING LITERALS
*
* This routine concatenates the string literals s and t.
*/
STRING
concat_string_lit(STRING s, STRING t)
{
string c;
STRING res;
STRING prev;
unsigned kc;
size_t sz;
unsigned long nc;
string a = DEREF_string (str_simple_text (s));
string b = DEREF_string (str_simple_text (t));
unsigned ka = DEREF_unsigned (str_simple_kind (s));
unsigned kb = DEREF_unsigned (str_simple_kind (t));
unsigned long na = DEREF_ulong (str_simple_len (s));
unsigned long nb = DEREF_ulong (str_simple_len (t));
/* Form the result literal */
if (na == 0) return (t);
if (nb == 0) return (s);
nc = na + nb;
if (nc < na || nc < nb) nc = ULONG_MAX;
if (!check_value (OPT_VAL_string_length, nc)) {
nc = option_value (OPT_VAL_string_length);
nb = nc - na;
}
kc = (ka | kb);
if (kc & STRING_MULTI) {
/* Multi-byte strings */
unsigned long sa = MULTI_WIDTH * na;
unsigned long sc = MULTI_WIDTH * nc;
sz = (size_t) (sc + MULTI_WIDTH);
c = ustring_alloc (sz);
make_multi_string (c, a, na, ka);
make_multi_string (c + sa, b, nb, kb);
add_multi_char (c + sc, (unsigned long) 0, CHAR_OCTAL);
} else {
/* Simple strings */
sz = (size_t) (nc + 1);
c = ustring_alloc (sz);
xumemcpy (c, a, (size_t) na);
xumemcpy (c + na, b, (size_t) nb);
c [nc] = 0;
}
MAKE_str_simple (nc, c, kc, res);
prev = share_string_lit (res);
if (!EQ_str (prev, res)) {
/* Share string literals */
unsigned long v;
DESTROY_str_simple (destroy, res, nc, c, kc, v, res);
ustring_free (c);
UNUSED (res);
UNUSED (nc);
UNUSED (kc);
UNUSED (v);
res = prev;
}
return (res);
}
/*
* FIND THE SHARED COPY OF A STRING LITERAL
*
* This routine is used to implement shared string literals. It returns
* the canonical copy of s (i.e. the first string equal to s for which
* the routine was called).
*/
STRING
share_string_lit(STRING s)
{
string a = DEREF_string (str_simple_text (s));
unsigned long h = (hash (a) % HASH_STRING_SIZE);
STRING p = string_hash_table [h];
STRING t = p;
while (!IS_NULL_str (t)) {
if (eq_string_lit (t, s)) return (t);
t = DEREF_str (str_next (t));
}
COPY_str (str_next (s), p);
string_hash_table [h] = s;
return (s);
}
/*
* GET THE NEXT CHARACTER FROM A STRING
*
* This routine returns the next character from the string s, using
* the tok field as a counter. The character type is assigned to pc,
* including CHAR_NONE to indicate the end of the string.
*/
unsigned long
get_string_char(STRING s, int *pc)
{
unsigned long c;
unsigned long i = DEREF_ulong (str_simple_tok (s));
unsigned long n = DEREF_ulong (str_simple_len (s));
if (i < n) {
string text = DEREF_string (str_simple_text (s));
unsigned kind = DEREF_unsigned (str_simple_kind (s));
if (kind & STRING_MULTI) {
c = get_multi_char (text + MULTI_WIDTH * i, pc);
} else {
c = (unsigned long) text [i];
*pc = CHAR_SIMPLE;
}
} else {
c = 0;
*pc = CHAR_NONE;
}
COPY_ulong (str_simple_tok (s), i + 1);
return (c);
}
/*
* FIND A CHARACTER LITERAL
*
* This routine returns the character value corresponding to the character
* literal expression e.
*/
int
get_char_value(EXP e)
{
int c = char_illegal;
if (!IS_NULL_exp (e)) {
if (IS_exp_int_lit (e)) {
NAT n = DEREF_nat (exp_int_lit_nat (e));
if (IS_nat_calc (n)) {
e = DEREF_exp (nat_calc_value (n));
}
}
if (IS_exp_cast (e)) {
e = DEREF_exp (exp_cast_arg (e));
if (IS_exp_int_lit (e)) {
NAT n = DEREF_nat (exp_int_lit_nat (e));
if (IS_nat_calc (n)) {
e = DEREF_exp (nat_calc_value (n));
}
}
}
if (IS_exp_char_lit (e)) {
STRING s = DEREF_str (exp_char_lit_str (e));
unsigned kind = DEREF_unsigned (str_simple_kind (s));
if (!(kind & STRING_MULTI)) {
unsigned long len = DEREF_ulong (str_simple_len (s));
if (len == 1) {
string t = DEREF_string (str_simple_text (s));
c = (int) *t;
}
}
}
}
return (c);
}
/*
* EVALUATE A CHARACTER LITERAL
*
* This routine evaluates the character literal str by mapping it to
* its ASCII representation. The value is stored in the tok field
* (the fact that LINK_NONE equals EXTENDED_MAX is convenient, but not
* essential).
*/
NAT
eval_char_lit(STRING str)
{
NAT n;
unsigned long v = DEREF_ulong (str_simple_tok (str));
if (v == LINK_NONE) {
unsigned long i;
string s = DEREF_string (str_simple_text (str));
unsigned long len = DEREF_ulong (str_simple_len (str));
unsigned kind = DEREF_unsigned (str_simple_kind (str));
if (kind & STRING_MULTI) {
NAT b = make_small_nat (256);
n = small_nat [0];
for (i = 0; i < len; i++) {
NAT d;
int ch = CHAR_SIMPLE;
unsigned long c = get_multi_char (s, &ch);
if (ch == CHAR_SIMPLE) c = to_ascii (c, &ch);
d = make_nat_value (c);
n = binary_nat_op (exp_mult_tag, n, b);
n = binary_nat_op (exp_plus_tag, n, d);
s += MULTI_WIDTH;
}
} else {
n = small_nat [0];
for (i = 0; i < len; i++) {
int ch = CHAR_SIMPLE;
unsigned long c = (unsigned long) *s;
c = to_ascii (c, &ch);
n = make_nat_literal (n, (unsigned) 256, (unsigned) c);
s++;
}
}
v = get_nat_value (n);
if (v != EXTENDED_MAX) {
/* Store calculated value */
COPY_ulong (str_simple_tok (str), v);
}
} else {
/* Use stored value */
n = make_nat_value (v);
}
return (n);
}
/*
* FIND A CHARACTER REPRESENTATION TYPE
*
* In the case where a character literal type doesn't fit into its type
* then this routine gives a type in which the literal value can be
* constructed and then converted into its underlying type.
*/
TYPE
find_char_type(NAT n)
{
TYPE t;
int fit = 0;
string str = NULL_string;
t = find_literal_type (n, BASE_OCTAL, SUFFIX_NONE, str, &fit);
return (t);
}
/*
* CREATE A STRING OR CHARACTER LITERAL EXPRESSION
*
* This routine turns a string or character literal into an expression.
* Note that the type of a normal character literal varies between C
* (where it is a char cast to an int) and C++ (where it stays as a
* char), and also that a string, or wide string, literal is an lvalue
* of array type.
*/
EXP
make_string_exp(STRING s)
{
EXP e;
string text = DEREF_string (str_simple_text (s));
unsigned long len = DEREF_ulong (str_simple_len (s));
unsigned kind = DEREF_unsigned (str_simple_kind (s));
if (kind & STRING_CHAR) {
int fits = 0;
int digit = -1;
TYPE t0, t1, t2;
NAT n = NULL_nat;
ERROR err = NULL_err;
if (kind & STRING_WIDE) {
t0 = type_wchar_lit;
t1 = t0;
t2 = t0;
} else if (len <= 1) {
t0 = type_char;
t1 = t0;
t2 = type_char_lit;
} else {
report (crt_loc, ERR_lex_ccon_multi (s));
t0 = type_mchar_lit;
t1 = t0;
t2 = t0;
}
if (len == 0) {
fits = 1;
n = small_nat [0];
COPY_ulong (str_simple_tok (s), 0);
} else if (len == 1) {
if (kind & STRING_MULTI) {
if (!(kind & STRING_FAT)) {
/* Simple octal or hex escape sequence */
unsigned long v = DEREF_ulong (str_simple_tok (s));
if (v == LINK_NONE) {
int ch = CHAR_SIMPLE;
v = get_multi_char (text, &ch);
if (ch == CHAR_OCTAL || ch == CHAR_HEX) {
if (v < 128) fits = 1;
n = make_nat_value (v);
COPY_ulong (str_simple_tok (s), v);
}
} else {
if (v < 128) fits = 1;
n = make_nat_value (v);
}
}
} else {
/* Single character */
character c = text [0];
if (in_hash_if_exp) {
/* Evaluate character value immediately */
unsigned long v = DEREF_ulong (str_simple_tok (s));
if (v == LINK_NONE) {
int ch = CHAR_SIMPLE;
v = (unsigned long) c;
v = to_ascii (v, &ch);
COPY_ulong (str_simple_tok (s), v);
}
if (v < 128) fits = 1;
n = make_nat_value (v);
} else {
if (c >= char_zero && c <= char_nine) {
/* Allow for digits */
digit = (int) (c - char_zero);
}
}
}
}
if (IS_NULL_nat (n)) {
/* Make character literal expression */
MAKE_exp_char_lit (t0, s, digit, e);
MAKE_nat_calc (e, n);
} else {
if (!fits && check_nat_range (t0, n) != NAT_FIT) {
/* Value doesn't fit into t0 */
t0 = find_char_type (n);
}
}
MAKE_exp_int_lit (t0, n, exp_char_lit_tag, e);
if (!EQ_type (t0, t1)) {
/* Convert from t0 to t1 */
e = make_cast_nat (t1, e, &err, CAST_STATIC);
}
if (!EQ_type (t1, t2)) {
/* Convert from t1 to t2 */
e = make_cast_nat (t2, e, &err, CAST_IMPLICIT);
}
if (!IS_NULL_err (err)) report (crt_loc, err);
} else {
/* String literals */
TYPE t;
NAT n = make_nat_value (len + 1);
if (kind & STRING_WIDE) {
t = type_wstring_lit;
} else {
t = type_string_lit;
}
MAKE_type_array (cv_lvalue, t, n, t);
MAKE_exp_string_lit (t, s, e);
}
return (e);
}
/*
* CREATE A BOOLEAN LITERAL EXPRESSION
*
* This routine creates a boolean literal expression given by the boolean
* value b (which should be one of the values BOOL_FALSE and BOOL_TRUE
* defined in literal.h).
*/
EXP
make_bool_exp(unsigned b, unsigned tag)
{
EXP e;
NAT n = small_nat [b];
MAKE_exp_int_lit (type_bool, n, tag, e);
return (e);
}
/*
* TEST A BOOLEAN LITERAL EXPRESSION
*
* This routine is the reverse of the one above. It returns the boolean
* value (BOOL_FALSE, BOOL_TRUE or BOOL_UNKNOWN) corresponding to the
* expression e.
*/
unsigned
test_bool_exp(EXP e)
{
NAT n = DEREF_nat (exp_int_lit_nat (e));
if (IS_nat_small (n)) {
unsigned b = DEREF_unsigned (nat_small_value (n));
if (b == BOOL_FALSE) return (BOOL_FALSE);
if (b == BOOL_TRUE) return (BOOL_TRUE);
}
return (BOOL_UNKNOWN);
}