#include "global.h"
#include "source.h"
#include "lex.h"
#include "list.h"
#include "nodes.h"
#include "parser.h"
#include "parparse.h"
// The ParGen parser expects a specification as follows and builds an
// internal representation for subsequent processing.
//
// specification ::= '%tokens' { IDENTIFIER }
// { ('%left' | '%right' | '%noassoc')
// { IDENTIFIER , ',' } }
// [ '%prelude' code '%%' ]
// [ '%inh' code '%%' ]
// '%rules' { rule }
// [ '%postlude' code '%%' ].
// rule ::= ruleHead [ '%attribute' code '%%' ]
// '=' ruleBody '.'.
// ruleHead ::= identifier.
// ruleBody ::= expression.
// expression ::= termSeq [ '|' expression ].
// termSeq ::= term [ termSeq ].
// term ::= identifier.
// The member functions specification() and rule() have structures
// consistent with that derived from the language grammar.
// The prelude and postlude markers allow for C++ code to be
// prepended and appended to the generated code. This is useful
// for libraries on which the semantic action code depends on.
void ParGenParser::specification()
{
int precedence = 0;
ParRule *term;
(new ParRule(SYSEOF))->defTerm();
expect(TOKENMARK);
while (symbol() == IDENTIFIER) {
if (!(term = ParRule::ruleSet.find(spelling())))
term = new ParRule(spelling());
term->defTerm();
nextSymbol();
}
// Operators may be tagged with associativity rules. Precedence is
// implied in the order. This is subsequently used in static conflict
// resolution.
while (symbol() == LEFTASSOCMARK || symbol() == RIGHTASSOCMARK ||
symbol() == NOASSOCMARK) {
int associativity;
switch (symbol()) {
case LEFTASSOCMARK: associativity = ASSOC_LEFT;
break;
case RIGHTASSOCMARK: associativity = ASSOC_RIGHT;
break;
case NOASSOCMARK: associativity = ASSOC_NONE;
break;
}
precedence++;
nextSymbol();
while (symbol() == IDENTIFIER) {
if (!(term = ParRule::ruleSet.find(spelling())))
term = new ParRule(spelling());
term->defTerm();
term->precedence = precedence;
term->associativity = associativity;
nextSymbol();
}
}
if (symbol() == PRELUDEMARK) {
prelude = scanCode();
nextSymbol();
}
if (symbol() == INHMARK) {
inhAttr = scanCode();
nextSymbol();
}
expect(RULESMARK);
while (symbol() == IDENTIFIER)
rule();
if (symbol() == POSTLUDEMARK) {
postlude = scanCode();
nextSymbol();
}
}
// Each non-terminal rule may allow specify synthesized attributes and
// evaluation functions. These are denoted by attribute and eval
// markers respectively.
void ParGenParser::rule()
{
ParRule *head;
if (head = ParRule::ruleSet.find(spelling())) {
// The rule is rearranged so that it occurs in the order of definition.
// Subsequently, this order resolves reduce-reduce parse conflicts.
ParRule::ruleSet.remove(head);
ParRule::ruleSet.append(head);
} else {
head = new ParRule(spelling());
}
nextSymbol();
if (symbol() == ATTRIBUTEMARK) {
head->attributes = scanCode();
nextSymbol();
}
expect(EQUAL);
head->defn(expression());
expect(PERIOD);
}
// The member functions termSeq() and expression() contain code to
// construct the appropriate internal representation for subsequent
// conversion to configurations.
// Rule names and alternatives have their respective representation:
// ParNonTermNode and ParSelectNode. A unique ParEndNode is used to
// represent the configuration which denotes the recognition of a rule
// at the end of the body. Semantic action markers are denoted by a
// ParActionNode.
ParNode *ParGenParser::termSeq()
{
int ending = FALSE;
ParNode *expr, *tail, *head = 0;
do {
/*
* An identifier term returns a ParNonTermNode with a reference
* to the original definition. However, names may be forward
* defined. In this case, a stub definition is inserted and its
* body will be elaborated when a definition is ultimately
* encountered.
*/
if (symbol() == IDENTIFIER) {
ParRule *rule;
if (!(rule = ParRule::ruleSet.find(spelling())))
rule = new ParRule(spelling());
expr = new ParNonTermNode(rule);
nextSymbol();
// Since component nodes are referred to by name rather than position,
// an alias mechanism allows for nodes of the same syntactic category
// belong in the rule body to be distinguished.
if (symbol() == ALIASMARK) {
nextSymbol();
if (symbol() == IDENTIFIER) {
expr->aliasIs(spelling());
nextSymbol();
}
}
// The generated bottom-up parser allows for dynamic conflict resolution
// whereby constituents are reduced only if an optional predicate
// evaluates to true. This is specified by the conditional mark.
} else if (symbol() == CONDMARK) {
char *cond = scanCode();
nextSymbol();
if (symbol() == ACTIONMARK) {
char *code = scanCode();
int posn = position();
expr = new ParEndNode(code, cond);
nextSymbol();
} else
expr = new ParEndNode(0, cond);
ending = TRUE;
} else if (symbol() == ACTIONMARK) {
char *code = scanCode();
int posn = position();
nextSymbol();
if (symbol() == PERIOD || symbol() == ALTERNATE) {
expr = new ParEndNode(code);
ending = TRUE;
} else
expr = new ParActionNode(code);
} else if (symbol() == PERIOD || symbol() == ALTERNATE) {
expr = new ParEndNode("/*-stump-*/"); // fake action code
ending = TRUE;
} else {
expr = 0;
parseError("term expected");
ending = TRUE;
nextSymbol();
}
// The new term is accumulated in a suitable list representation.
if (head == 0)
head = expr;
else
tail->nextIs(expr);
tail = expr;
} while (!ending);
return head;
}
ParNode *ParGenParser::expression()
{
ParNode *expr;
expr = termSeq();
if (symbol() == Lexer::ALTERNATE) {
nextSymbol();
return(new ParSelectNode(expr, expression()));
}
return expr;
}
char *ParGenParser::prelude = 0;
char *ParGenParser::postlude = 0;
char *ParGenParser::inhAttr = 0;
char *ParGenParser::currentfile = 0;
syntax highlighted by Code2HTML, v. 0.9.1