#include "global.h"
#include "source.h"
#include "lex.h"
#include "list.h"
#include "irep.h"
#include "parser.h"
#include "lexparse.h"
// The LexGen parser expects a specification as follows and builds an
// internal representation for subsequent processing.
//
// specification ::= [ '%prelude' code '%%' ]
// { [ '%tokens' | '%ignore' ] rule }
// [ '%postlude' code '%%' ].
// rule ::= ruleHead '=' ruleBody [ %filter' ... '%%' ] '.'.
// ruleHead ::= 'identifier'.
// ruleBody ::= expression.
// expression ::= termSeq [ '|' expression ].
// termSeq ::= term [ termSeq ]
// term ::= char [ '-' char ]
// | identifier
// | '{' expression '}'
// | '[' expression ']'
// | '(' expression ')' .
// The member functions specification() and rule() have structures
// consistent with that derived from the language grammar.
void LexGenParser::specification()
{
if (symbol() == PRELUDEMARK) {
prelude = scanCode();
nextSymbol();
}
while (symbol() == IDENTIFIER ||
symbol() == IGNOREMARK || symbol() == TOKENMARK)
rule();
if (symbol() == POSTLUDEMARK) {
postlude = scanCode();
nextSymbol();
}
}
void LexGenParser::rule()
{
LexRule *head;
if (symbol() == Lexer::IGNOREMARK) {
group = IGNORETYPE;
nextSymbol();
} else if (symbol() == Lexer::TOKENMARK) {
group = TOKENTYPE;
nextSymbol();
}
if (!(head = LexRule::ruleSet.find(spelling())))
head = new LexRule(spelling());
if (group == TOKENTYPE)
head->token();
else if (group == IGNORETYPE)
head->ignored();
nextSymbol();
expect(EQUAL);
head->defn(expression());
if (symbol() == FILTERMARK) {
// The filter mark allows for the execution of code hooks before the
// generated lexer returns a token. The scanCode() member function is
// invoked to scan such code fragments.
head->filterIs(scanCode());
nextSymbol();
}
expect(Lexer::PERIOD);
}
// The member functions term(), termSeq() and expression() contain code
// to construct the appropriate internal representation for subsequent
// conversion to an appropriate Finite State Acceptor.
// Lexical items, nonterminal rules, alternatives and repetitive rules
// have their respective representation: LexCharNode, LexNonTermNode,
// LexSelectNode and LexRepeatNode.
LexNode *LexGenParser::term()
{
switch (symbol()) {
// A lexical term returns a LexCharNode. Where a character range is
// present, it is appropriately recorded.
case Lexer::CHARCONST: {
char c = spelling()[0];
nextSymbol();
if (symbol() == Lexer::RANGE) {
nextSymbol();
mustBe(Lexer::CHARCONST);
char range = spelling()[0];
nextSymbol();
return(new LexCharNode(c, range));
} else
return(new LexCharNode(c));
}
// An identifier term returns a LexNonTermNode with a reference to the
// original definition. However, names may be forward defined. In this
// case, a stub definition is inserted and its body will be elaborated
// when a definition is ultimately encountered.
case Lexer::IDENTIFIER: {
LexNode *n;
LexRule *rule;
if (!(rule = LexRule::ruleSet.find(spelling())))
rule = new LexRule(spelling());
n = new LexNonTermNode(rule);
nextSymbol();
return n;
}
// A left brace indicates a repetitive rule. It returns a LexRepeatNode
// with a reference to the repeated expression. As with most recursive
// descent parsers, this is a recursive call and it accommodates all
// nesting levels.
case Lexer::LEFTBRACE: {
LexNode *n;
bool atLeastOne = FALSE;
LexNode *sep = 0;
nextSymbol();
n = expression();
if (symbol() == Lexer::COMMA) {
nextSymbol();
sep = expression();
}
expect(RIGHTBRACE);
if (symbol() == Lexer::PLUS) {
atLeastOne = TRUE;
nextSymbol();
} else if (symbol() == Lexer::STAR)
nextSymbol();
return(new LexRepeatNode(n, sep, atLeastOne));
}
// A left bracket indicates an optional rule. It returns a
// LexSelectNode with a reference to the optional expression. The other
// option is left as 0. As with the previous repetition rule, this
// is a recursive call and it accommodates all nesting levels.
case Lexer::LEFTBRACKET: {
LexNode *n;
nextSymbol();
n = expression();
expect(RIGHTBRACKET);
return(new LexSelectNode(n, 0));
}
// A left parenthesis merely indicates precedence and no special
// processing is required.
case Lexer::LEFTPARENT: {
LexNode *n;
nextSymbol();
n = expression();
expect(Lexer::RIGHTPARENT);
return n;
}
// double quoted string
case Lexer::DQUOTE: {
return qwString();
}
// The following symbols indicate the end of an expression sequence
// and thus merely result 0.
case Lexer::RIGHTBRACE:
case Lexer::RIGHTBRACKET:
case Lexer::RIGHTPARENT:
case Lexer::ALTERNATE:
case Lexer::COMMA:
case Lexer::FILTERMARK:
case Lexer::PERIOD:
return 0;
}
// Other symbols not covered in the above list is erroneous and is
// reported as such:
parseError("term expected");
nextSymbol();
return 0;
}
// Recognising a sequence of terms involves processing the first term,
// and subsequently checking for subsequent terms via a recursive call
// and linking via the next pointer of LexNode instances.
LexNode *LexGenParser::termSeq()
{
LexNode *n;
n = term();
if (!n)
return 0;
switch (symbol()) {
// These symbols continue a terminal sequence, and in turn, continue
// processing.
case Lexer::IDENTIFIER:
case Lexer::CHARCONST:
case Lexer::LEFTBRACE:
case Lexer::LEFTBRACKET:
case Lexer::LEFTPARENT:
while (n->next != 0)
n = n->next;
n->next = termSeq();
}
return n;
}
LexNode *LexGenParser::qwString() {
expect(Lexer::DQUOTE); // skip opening quote
LexNode *first = 0;
LexNode *last = 0;
for (int i = 0; spelling()[i]; ++i) {
LexNode *n = new LexCharNode(spelling()[i]);
if (last)
last->next = n;
last = n;
if (!first)
first = n;
}
nextSymbol(); // skip closing quote
expect(Lexer::DQUOTE);
if (!first)
parseError("empty quoted strings are not allowed");
return first;
}
// Since the alternative operator has the weakest precedence, it is
// handled separately.
LexNode *LexGenParser::expression()
{
LexNode *n;
n = termSeq();
if (symbol() == Lexer::ALTERNATE) {
nextSymbol();
return(new LexSelectNode(n, expression()));
}
return n;
}
char *LexGenParser::prelude = 0;
char *LexGenParser::postlude = 0;
const char *LexGenParser::currentfile = 0;
syntax highlighted by Code2HTML, v. 0.9.1