/*
 * Scanner or Lexical Analyzer
 */
#include <sys/types.h>
#include "ansi.h"
#include "host.h"
#include "files.h"
#include "hash.h"
#include "buffer.h"
#include "cpp.h"
#include "cpp_hide.h"
#include "allocate.h"
#include "il.h"
#include "nodeop.h"
#include "types.h"
#include "stab.h"
#include "y.tab.h"
#include "config.h"
#include "type_util.h"

#undef NULL
#define NULL			0

#define FN				file_name(yypos)
#define LN				line_number(yypos)

file_pos_t yypos;
static int yyc;
static int skipping_compound_statements;
static node_t *last_ident;

void
yyerror(msg)
	char *msg;
{
	error(FN,LN,msg);
}

void
yysyntax_corrector()
{
}

void
td()
{
}

void
yylex_init()
{
	yyc = cpp_getc();
}

static void
end_line()
{
	yypos++;
	last_ident = NULL;
}

static int
magnitude(c, val)
	int c;
	host_int_t *val;
{
	host_int_t m;
	int sign;

	if (! is_magnitude(c)) {
		*val = 1;
		return c;
	}

	c = cpp_getc();
	sign = 1;

	if (c == '-') {
		sign = -1;
		c = cpp_getc();
	}
	else if (c == '+') {
		c = cpp_getc();
	}

	for (m = 0; is_digit(c); ) {
		m = m * 10 + (c - '0');
		c = cpp_getc();
	}

	*val = m * sign;
	return c;
}

static int
grok_number()
{
	host_int_t val;

	if (yyc == '0') {
		val = 0;
		yyc = cpp_getc();
		if (yyc == 'x' || yyc == 'X') {
			for (;;) {
				yyc = cpp_getc();
				if (is_digit(yyc)) {
					val = (val<<4) + (yyc - '0');
				}
				else if (yyc >= 'A' && yyc <= 'F') {
					val = (val<<4) + 10 + (yyc - 'A');
				}
				else if (yyc >= 'a' && yyc <= 'f') {
					val = (val<<4) + 10 + (yyc - 'a');
				}
				else {
					break;
				}
			}
		}
		else {
			while (is_octal_digit(yyc)) {
				val = (val<<3) + (yyc - '0');
				yyc = cpp_getc();
			}
		}
	}
	else {
		val = yyc - '0';
		for (;;) {
			yyc = cpp_getc();
			if (! is_digit(yyc)) break;
			val = val * 10 + (yyc - '0');
		}
	}

	if (int_modifier(yyc)) {
		yyc = cpp_getc();
	}

	if (yyc == '.') {
		host_float_t dval, d;
		int tmp;

		dval = (host_float_t) val;
		d = 0.1;

		for (;;) {
			yyc = cpp_getc();
			if (! is_digit(yyc)) break;
			tmp = yyc - '0';
			dval = dval + (d * (host_float_t)tmp);
			d = d * 0.1;
		}


		/* handle magnitude */
		if (is_magnitude(yyc)) {
			host_int_t vm;
			host_float_t m;

			yyc = magnitude(yyc, &vm);

			if (vm > 0) {
				for (m = 1.0; vm; vm--) {
					m *= 10.0;
				}
			}
			else {
				for (m = 1.0; vm; vm++) {
					m /= 10.0;
				}
			}

			dval *= m;
		}

		if (float_modifier(yyc)) {
			yyc = cpp_getc();
		}

		yylval.nod = new_node(_FP_Number, dval);
		return FLOATING_CONSTANT;
	}

	while (int_modifier(yyc)) {
		yyc = cpp_getc();
	}

	yylval.nod = new_node(_Int_Number, val);
	return INTEGER_CONSTANT;
}

static int
grok_ident()
{
#ifdef PUBLIC
	struct resword {char *name; short token;};
	extern struct resword *c_rsvd();
	struct resword *r;
#endif
	char id[1024], *p;
	symbol_t *sym;
	int keywd;

	for (p = id; is_alpha_numeric(yyc); yyc = cpp_getc()) {
		*p++ = yyc;
	}
	*p = 0;
		
#ifdef PUBLIC
	if ((r = c_rsvd(id, p-id)) != NULL) return r->token;
#else
	if ((keywd = c_rsvd(id)) != -1) return keywd;
#endif

	if (sym = find_sym(id)) {
		if (is_typedef(sym)) {
			yylval.typ = copy_type(sym->sym_type);
			yylval.typ->type_base = sym;
			yylval.typ->_typedef = 0;
			return TYPEDEF_NAME;
		}
		if (is_enum_literal(sym)) {
			yylval.nod = new_node(_Sym, sym);
			return ENUMERATION_CONSTANT;
		}
	}

	yylval.nod = new_node(_Ident, new_string(id));
	last_ident = yylval.nod;
	return IDENTIFIER;
}

static int
escaped_char(c)
	int c;
{
	int val, i;

	switch (yyc) {
	  case 'n':		return '\n';
	  case 't':		return '\t';
	  case 'v': 	return '\v';
	  case 'b': 	return '\b';
	  case 'r': 	return '\r';
	  case 'f': 	return '\f';
	  case 'a': 	return '\a';
	  case '?': 	return '\?';
	  case '\'': 	return '\'';
	  case '\"': 	return '\"';
	  case '\\': 	return '\\';

	  case 0:
		for (i = 0, val = 0; is_octal_digit(yyc) && i < 3; i++, yyc = cpp_getc()) {
			val = val * 8 + (yyc - '0');
		}
		return val;

	  case 'x':
		for (i = 0, val = 0; is_hex_digit(yyc) && i < 2; i++, yyc = cpp_getc()) {
			val *= 16;
			if (is_digit(yyc)) {
				val += (yyc - '0');
			}
			else if (yyc <= 'F') {
				val += (yyc - ('A' - 10));
			}
			else {
				val += (yyc - ('a' - 10));
			}
		}
		return val;

	  default:
		break;
	}

	return c;
}

static int
scan_char_const()
{
	host_int_t cval = 0;
	int c;

	for (;;) {
		if (is_eof(yyc)) {
			error(FN,LN,"End of file while scanning char constant");
			goto end_char_const;
		}
		if (is_eol(yyc)) {
			error(FN,LN,"End of line while scanning char constant");
			goto end_char_const;
		}
		switch (yyc) {
		  case '\'':
			yyc = cpp_getc();
			goto end_char_const;
		  case '\\':
			yyc = cpp_getc();
			switch (yyc) {
			  case '\n':
				yypos++;
				yyc = cpp_getc();
				break;
			  case '0':
			  case 'x':
			  case 'n':
			  case 't':
			  case 'v':
			  case 'b':
			  case 'r':
			  case 'f':
			  case 'a':
			  case '?':
			  case '\\':
			  case '\'':
			  case '\"':
				c = yyc;
				yyc = cpp_getc();
				cval <<= 8;
				cval |= escaped_char(c);
				break;
			  default:
				cval <<= 8;
				cval |= yyc;
				yyc = cpp_getc();
				break;
			}
			break;
		  default:
			cval <<= 8;
			cval |= yyc;
			yyc = cpp_getc();
			break;
		}
	}

  end_char_const:
	yylval.nod = new_node(_Int_Number, cval);
	return CHARACTER_CONSTANT;
}

static int
scan_string()
{
	buffer_t buf;
	int c, len;
	char *s;

	buf_init(&buf);

	for (;;) {
		if (is_eof(yyc)) {
			error(FN,LN,"End of file while scanning string constant");
			goto end_of_string;
		}
		if (is_eol(yyc)) {
			error(FN,LN,"End of line while scanning string constant");
			goto end_of_string;
		}
		switch (yyc) {
		  case '"':
			yyc = cpp_getc();
			goto end_of_string;
		  case '\\':
			yyc = cpp_getc();
			switch (yyc) {
			  case '\n':
				yypos++;
				yyc = cpp_getc();
				break;
			  case '0':
			  case 'x':
			  case 'n':
			  case 't':
			  case 'v':
			  case 'b':
			  case 'r':
			  case 'f':
			  case 'a':
			  case '?':
			  case '\\':
			  case '\'':
			  case '\"':
				c = yyc;
				yyc = cpp_getc();
				buf_add(&buf, escaped_char(c));
				break;
			  default:
				buf_add(&buf, yyc);
				yyc = cpp_getc();
				break;
			}
			break;
		  default:
			buf_add(&buf, yyc);
			yyc = cpp_getc();
			break;
		}
	}

  end_of_string:
	len = buf_count(&buf);
	s = buf_get_str(&buf);

	yylval.nod = new_node(_String, s, len);
	return STRING;
}

static int
skip_to_end(c)
	int c;
{
	for (;;) {
		if (is_eof(c) || is_eol(c)) break;
		c = cpp_getc();
	}
	return c;
}

int
skip_white(c)
	int c;
{
	while (is_white(c)) {
		c = cpp_getc();
	}
	return c;
}


static int
skip_c_comment(c)
	int c;
{
	int tmp;

	for (;;) {
		switch (classof(c)) {
		  case END_INPUT:
			return 0;
		  case END_OF_LINE:
			yypos++;
			c = cpp_getc();
			break;
		  case DIGIT | XDIGIT:
		  case ALPHA:
		  case ALPHA | XDIGIT:
		  case MSTART:
		  case WHITE:
			c = cpp_getc();
			break;
		  case PUNCT:
			tmp = c;
			c = cpp_getc();
			switch (tmp)  {
			  case '*':
				if (c == '/') {
					return cpp_getc();
				}
				break;
			}
			break;
		  default:
			assert(0);
			break;
		}
	}
}

static int
save_c_comment(c)
	int c;
{
	int tmp, result;
	buffer_t buf;

	buf_init(&buf);

	for (;;) {
		switch (classof(c)) {
		  case END_INPUT:
			result = 0;
			goto end_of_subp;
		  case END_OF_LINE:
			buf_add(&buf, c);
			yypos++;
			c = cpp_getc();
			break;
		  case DIGIT | XDIGIT:
		  case ALPHA:
		  case ALPHA | XDIGIT:
		  case MSTART:
		  case WHITE:
			buf_add(&buf, c);
			c = cpp_getc();
			break;
		  case PUNCT:
			tmp = c;
			c = cpp_getc();
			switch (tmp)  {
			  case '*':
				if (c == '/') {
					result = cpp_getc();
					goto end_of_subp;
				}
				break;
			}
			buf_add(&buf, tmp);
			break;
		  default:
			assert(0);
			break;
		}
	}

  end_of_subp:
	if (buf_count(&buf) > 0) {
		assert(last_ident->node_kind == _Ident);
		last_ident->node.id.cmnt = buf_get_str(&buf);
	}

	last_ident = NULL;
	return result;
}

static int
scan_comment(c)
	int c;
{
	if (last_ident) {
		return save_c_comment(c);
	}
	return skip_c_comment(c);
}

static void
grok_directive()
{
	char fname[256];
	int i, line, nest;

	yyc = skip_white(cpp_getc());
	if (! is_digit(yyc)) {
		yyc = skip_to_end(yyc);
		return;
	}
	for (line = 0; is_digit(yyc); yyc = cpp_getc()) {
		line = line * 10 + (yyc - '0');
	}
	yyc = skip_white(yyc);
	if (yyc != '"') {
		yyc = skip_to_end(yyc);
		return;
	}
	for (i = 0; ; i++) {
		yyc = cpp_getc();
		if (yyc == '"' || is_eof(yyc) || is_eol(yyc)) break;
		fname[i] = yyc;
	}
	fname[i] = 0;
	if (i == 0) {
		yyc = skip_to_end(yyc);
		return;
	}

	yypos = set_file_pos(fname, line);
#ifndef CCPROTO
	init_unit(yypos);
#endif

	if (yyc != '"') {
		yyc = skip_to_end(yyc);
		return;
	}

	yyc = skip_white(cpp_getc());
	if (! is_digit(yyc)) {
		yyc = skip_to_end(yyc);
		return;
	}

	for (nest = 0; is_digit(yyc); yyc = cpp_getc()) {
		nest = nest * 10 + (yyc - '0');
	}

#ifndef CCPROTO
	unit_included(yypos, nest);
#endif

	yyc = skip_to_end(yyc);
	if (is_eol(yyc)) {
		/* DON'T increment yypos */
		yyc = cpp_getc();
	}
}

static int
skip()
{
	int token;

	token = yylex();

	for (;;) {
		switch (token) {
		  case '}':
		  case 0:
			goto done;
		  case '{':
			token = skip();
			if (token == '}') {
				token = yylex();
			}
			break;
		  default:
			token = yylex();
			break;
		}
	}

  done:
	return token;
}

int
yylex()
{
	int token;

	if (skipping_compound_statements) {
		skipping_compound_statements = 0;
		token = skip();
		return token;
	}

	for (;;) {
		switch (classof(yyc)) {
		  case END_INPUT:
			return 0;
		  case WHITE:
			do {
				yyc = cpp_getc();
			} while (is_white(yyc));
			break;
		  case END_OF_LINE:
			end_line();
			yyc = cpp_getc();
			break;
		  case DIGIT | XDIGIT:
			return grok_number();
		  case ALPHA:
		  case ALPHA | XDIGIT:
			return grok_ident();
		  case MSTART:
			grok_directive();
			break;
		  case PUNCT:
			token = yyc;
			yyc = cpp_getc();
			switch (token)  {
			  case '.':
				if (yyc == '.') {
					yyc = cpp_getc();
					if (yyc == '.') {
						yyc = cpp_getc();
						return ELIPSIS;
					}
					return DOTDOT;
				}
				break;
			  case '\"':
				return scan_string();
			  case '\'':
				return scan_char_const();
			  case '&':
				switch (yyc) {
				  case '&':
					yyc = cpp_getc();
					return AND_OP;
				  case '=':
					yyc = cpp_getc();
					return AND_ASSIGN;
				}
				break;
			  case '^':
				if (yyc == '=') {
					yyc = cpp_getc();
					return XOR_ASSIGN;
				}
				break;
			  case '|':
				switch (yyc) {
				  case '|':
					yyc = cpp_getc();
					return OR_OP;
				  case '=':
					yyc = cpp_getc();
					return OR_ASSIGN;
				}
				break;
			  case '*':
				if (yyc == '=') {
					yyc = cpp_getc();
					return MUL_ASSIGN;
				}
				break;
			  case '/':
				switch (yyc) {
				  case '=':
					yyc = cpp_getc();
					return DIV_ASSIGN;
				  case '*':
					yyc = scan_comment(cpp_getc());
					continue;
				}
				break;
			  case '%':
				if (yyc == '=') {
					yyc = cpp_getc();
					return MOD_ASSIGN;
				}
				break;
			  case '<':
				switch(yyc) {
				  case '<':
					yyc = cpp_getc();
					if (yyc == '=') {
						yyc = cpp_getc();
						return LEFT_ASSIGN;
					}
					return LEFT_OP;
				  case '=':
					yyc = cpp_getc();
					return LE_OP;
				}
				break;
			  case '>':
				switch(yyc) {
				  case '>':
					yyc = cpp_getc();
					if (yyc == '=') {
						yyc = cpp_getc();
						return RIGHT_ASSIGN;
					}
					return RIGHT_OP;
				  case '=':
					yyc = cpp_getc();
					return GE_OP;
				}
				break;
			  case '=':
				if (yyc == '=') {
					yyc = cpp_getc();
					return EQ_OP;
				}
				break;
			  case '!':
				if (yyc == '=') {
					yyc = cpp_getc();
					return NE_OP;
				}
				break;
			  case '+':
				switch (yyc) {
				  case '+':
					yyc = cpp_getc();
					return INC_OP;
				  case '=':
					yyc = cpp_getc();
					return ADD_ASSIGN;
				}
				break;
			  case '-':
				switch(yyc) {
				  case '-':
					yyc = cpp_getc();
					return DEC_OP;
				  case '=':
					yyc = cpp_getc();
					return SUB_ASSIGN;
				  case '>':
					yyc = cpp_getc();
					return PTR_OP;
				}
				break;
			}
			return token;
		  default:
			assert(0);
			break;
		}
	}
}

int
yyskip()
{
	skipping_compound_statements = 1;
}


syntax highlighted by Code2HTML, v. 0.9.1