#include	"global.h"
#include	"source.h"
#include	"lex.h"
#include	"list.h"
#include	"irep.h"
#include	"parser.h"
#include	"lexparse.h"

// The LexGen parser expects a specification as follows and builds an
// internal representation for subsequent processing.
//
//      specification   ::= [ '%prelude' code '%%' ]
//                          { [ '%tokens' | '%ignore' ] rule }
//                          [ '%postlude' code '%%' ].
//      rule            ::= ruleHead '=' ruleBody [ %filter' ... '%%' ] '.'.
//      ruleHead        ::= 'identifier'.
//      ruleBody        ::= expression.
//      expression      ::= termSeq [ '|' expression ].
//      termSeq         ::= term [ termSeq ]
//      term            ::= char [ '-' char ]
//                        | identifier
//                        | '{' expression '}'
//                        | '[' expression ']'
//                        | '(' expression ')' .

// The member functions specification() and rule() have structures
// consistent with that derived from the language grammar.

void LexGenParser::specification()
{
	if (symbol() == PRELUDEMARK) {
		prelude = scanCode();
		nextSymbol();
	}
	while (symbol() == IDENTIFIER ||
		symbol() == IGNOREMARK || symbol() == TOKENMARK)
		rule();
	if (symbol() == POSTLUDEMARK) {
		postlude = scanCode();
		nextSymbol();
	}
}

void LexGenParser::rule()
{
	LexRule *head;

	if (symbol() == Lexer::IGNOREMARK) {
		group = IGNORETYPE;
		nextSymbol();
	} else if (symbol() == Lexer::TOKENMARK) {
		group = TOKENTYPE;
		nextSymbol();
	}
	if (!(head = LexRule::ruleSet.find(spelling())))
		head = new LexRule(spelling());
	if (group == TOKENTYPE)
		head->token();
	else if (group == IGNORETYPE)
		head->ignored();
	nextSymbol();
	expect(EQUAL);
	head->defn(expression());
	if (symbol() == FILTERMARK) {

// The filter mark allows for the execution of code hooks before the
// generated lexer returns a token.  The scanCode() member function is
// invoked to scan such code fragments.

		head->filterIs(scanCode());
		nextSymbol();
	}
	expect(Lexer::PERIOD);
}

// The member functions term(), termSeq() and expression() contain code
// to construct the appropriate internal representation for subsequent
// conversion to an appropriate Finite State Acceptor.

// Lexical items, nonterminal rules, alternatives and repetitive rules
// have their respective representation: LexCharNode, LexNonTermNode,
// LexSelectNode and LexRepeatNode.

LexNode *LexGenParser::term()
{
	switch (symbol()) {

// A lexical term returns a LexCharNode.  Where a character range is
// present, it is appropriately recorded.

	case Lexer::CHARCONST: {
                char c = spelling()[0];
                nextSymbol();
                if (symbol() == Lexer::RANGE) {
                        nextSymbol();
                        mustBe(Lexer::CHARCONST);
                        char range = spelling()[0];
                        nextSymbol();
                        return(new LexCharNode(c, range));
                } else
                        return(new LexCharNode(c));
	}

// An identifier term returns a LexNonTermNode with a reference to the
// original definition.  However, names may be forward defined.  In this
// case, a stub definition is inserted and its body will be elaborated
// when a definition is ultimately encountered.

	case Lexer::IDENTIFIER: {
		LexNode *n;
		LexRule *rule;

		if (!(rule = LexRule::ruleSet.find(spelling())))
			rule = new LexRule(spelling());
		n = new LexNonTermNode(rule);
		nextSymbol();
		return n;
	}

// A left brace indicates a repetitive rule.  It returns a LexRepeatNode
// with a reference to the repeated expression.  As with most recursive
// descent parsers, this is a recursive call and it accommodates all
// nesting levels.

	case Lexer::LEFTBRACE: {
		LexNode *n;
		bool atLeastOne = FALSE;
		LexNode *sep = 0;

		nextSymbol();
		n = expression();
		if (symbol() == Lexer::COMMA) {
			nextSymbol();
			sep = expression();
		}
		expect(RIGHTBRACE);
		if (symbol() == Lexer::PLUS) {
			atLeastOne = TRUE;
			nextSymbol();
		} else if (symbol() == Lexer::STAR)
			nextSymbol();
		return(new LexRepeatNode(n, sep, atLeastOne));
	}

// A left bracket indicates an optional rule.  It returns a
// LexSelectNode with a reference to the optional expression.  The other
// option is left as 0.  As with the previous repetition rule, this
// is a recursive call and it accommodates all nesting levels.

	case Lexer::LEFTBRACKET: {
		LexNode *n;

		nextSymbol();
		n = expression();
		expect(RIGHTBRACKET);
		return(new LexSelectNode(n, 0));
	}

// A left parenthesis merely indicates precedence and no special
// processing is required.

	case Lexer::LEFTPARENT: {
		LexNode *n;

		nextSymbol();
		n = expression();
		expect(Lexer::RIGHTPARENT);
		return n;
	}


	// double quoted string
	case Lexer::DQUOTE: {
		return qwString();
	}

// The following symbols indicate the end of an expression sequence
// and thus merely result 0.

	case Lexer::RIGHTBRACE:
	case Lexer::RIGHTBRACKET:
	case Lexer::RIGHTPARENT:
	case Lexer::ALTERNATE:
	case Lexer::COMMA:
	case Lexer::FILTERMARK:
	case Lexer::PERIOD:
		return 0;
	}

// Other symbols not covered in the above list is erroneous and is
// reported as such:

	parseError("term expected");
	nextSymbol();
	return 0;
}

// Recognising a sequence of terms involves processing the first term,
// and subsequently checking for subsequent terms via a recursive call
// and linking via the next pointer of LexNode instances.

LexNode *LexGenParser::termSeq()
{
	LexNode *n;

	n = term();
	if (!n)
		return 0;
	switch (symbol()) {

// These symbols continue a terminal sequence, and in turn, continue
// processing.

	case Lexer::IDENTIFIER:
	case Lexer::CHARCONST:
	case Lexer::LEFTBRACE:
	case Lexer::LEFTBRACKET:
	case Lexer::LEFTPARENT:
		while (n->next != 0)
			n = n->next;
		n->next = termSeq();
	}
	return n;
}

LexNode *LexGenParser::qwString() {

	expect(Lexer::DQUOTE); // skip opening quote

	LexNode *first = 0;
	LexNode *last = 0;
	for (int i = 0; spelling()[i]; ++i) {
		LexNode *n = new LexCharNode(spelling()[i]);
		if (last)
			last->next = n;
		last = n;
		if (!first)
			first = n;
	}
	nextSymbol(); // skip closing quote
	expect(Lexer::DQUOTE);

	if (!first)
		parseError("empty quoted strings are not allowed");

	return first;
}

// Since the alternative operator has the weakest precedence, it is
// handled separately.

LexNode *LexGenParser::expression()
{
	LexNode *n;

	n = termSeq();
	if (symbol() == Lexer::ALTERNATE) {
		nextSymbol();
		return(new LexSelectNode(n, expression()));
	}
	return n;
}

char *LexGenParser::prelude = 0;
char *LexGenParser::postlude = 0;
const char *LexGenParser::currentfile = 0;


syntax highlighted by Code2HTML, v. 0.9.1