#include "global.h"
#include "source.h"
#include "lex.h"
#include "list.h"
#include "nodes.h"
#include "conf.h"
// A parser is made via the static function makeParser().
void ParRule::makeParser()
{
listIter<ParRule *> forEach(&ruleSet);
ParRule **rule;
// Before the actual parser generation, consistency checks are performed
// on grammar rules.
// As forward declarations result in a rule stub, the consistency check
// first ensures that each non-terminal has a definition. (Terminal
// symbols are already marked as having a definition.)
while (rule = forEach())
if (!(*rule)->hasDefinition())
error(ERROR, "'%s' is not defined\n", (*rule)->name());
// We also confirm that a root non-terminal exists.
ParRule *root;
forEach.init(&ruleSet);
while (rule = forEach())
if ((*rule)->isNonTerm()) {
root = *rule;
break;
}
if (errorsFound() || !root)
error(ABORT, "Errors encountered in BNF specification\n");
// From the root non-terminal, the super root non-terminal rule is added
// with the special EOF terminal symbol.
systemRoot = new ParRule(SYSROOT);
ParNode* body = new ParNonTermNode(root);
body->nextIs(new ParNonTermNode(ParRule::ruleSet.find(SYSEOF)));
body->next()->nextIs(new ParEndNode());
systemRoot->defn(body);
// Without dynamic conditional rules, reduce-reduce conflicts are
// resolved by looking up the order of rules, and using the earlier
// rule. The representation is ordered to reflect the ordering of rule
// definition.
int order;
ParRule::systemRoot->rhs()->order(order);
forEach.init(&ruleSet);
while (rule = forEach())
if ((*rule)->isNonTerm() &&
(*rule)->id() < ParRule::systemRoot->id())
(*rule)->rhs()->order(order);
// The maybeEmpty and startersets attributes for each rule are
// precomputed for efficient usage in the parser generation process.
// Since the computation depended on similar attributes in other rules,
// the process was repeated until a stable equilibrium was reached.
int changed;
do {
changed = FALSE;
forEach.init(&ruleSet);
while (rule = forEach()) {
int answer = ((*rule)->isNonTerm() ?
(*rule)->rhs()->maybeEmpty() :
FALSE);
if ((*rule)->maybeEmpty != answer) {
changed = TRUE;
(*rule)->maybeEmpty = answer;
}
}
} while (changed);
do {
changed = FALSE;
forEach.init(&ruleSet);
while (rule = forEach())
if ((*rule)->isNonTerm()) {
list<ParRule *> sym;
int originalLength = (*rule)->starters.length();
(*rule)->rhs()->starters(&sym);
(*rule)->starters.setUnion(sym);
if (originalLength != (*rule)->starters.length())
changed = TRUE;
}
} while (changed);
// Instead of generating LALR(1) configurations by merging LR(1) states,
// an optimised approach is used by generating LR(0) configurations and
// then subsequently adding lookahead sets.
// The LR(0) complete configuration graph corresponding to a parser is
// generated by creating an initial configuration set as represented by
// LConfList. This gives the first configuration set, from which moves
// are recursively generated, with finding configurations of destination
// states.
LConfList *firstClosure = new LConfList();
systemRoot->rhs()->findClosure(systemRoot, firstClosure);
ConfSet *initial = new ConfSet(firstClosure);
initial->findMoves();
// With the LR(0) configurations completed, the lookahead sets are
// constructed.
// The generation of LR(0) configurations relied on having only one
// LConf instance for each unique configuration. When lookahead sets
// are added, LConfs occuring in different contexts must be
// distinguished due to different lookaheads. As such, all LConfs are
// replicated to ensure that each instance is unique.
listIter<ConfSet *> forEachConfigSet(&ConfSet::confSets);
ConfSet **cs;
while (cs = forEachConfigSet())
(*cs)->closure->replicate();
// As with maybeEmpty and startersets above, because the lookahead sets
// depend on each other, the propagation continues until an equilibrium
// is reached, as indicated by the flag changed.
do {
listIter<ConfSet *> forEachConfigSet(&ConfSet::confSets);
ConfSet **cs;
changed = FALSE;
while (cs = forEachConfigSet()) {
listIter<LConf *> forEachLConf((*cs)->closure);
LConf **c1;
// Spontaneous lookaheads of the following form are first propagated:
// A -> B . C D (c1)
// C -> . E F G (c2)
// When the configuration c1 and c2 belong to a configuration set,
// the starters of the portion starting with D be added to the
// C configuration.
while (c1 = forEachLConf())
if ((*c1)->conf->dot->next() &&
(*c1)->conf->dot->nontermRule()->isNonTerm()) {
listIter<LConf *> forEach((*cs)->closure);
LConf **c2;
while (c2 = forEach())
if ((*c1)->conf->dot->nontermRule() ==
(*c2)->conf->rule) {
list<ParRule *> sym;
int x = (*c2)->lookAhead.length();
(*c1)->conf->dot->next()->starters(&sym);
(*c2)->lookAhead.setUnion(sym);
// If the end of rule is found, the lookahead set itself is propagated.
if ((*c1)->conf->dot->next()->maybeEmpty())
(*c2)->lookAhead.setUnion((*c1)->lookAhead);
if ((*c2)->lookAhead.length() != x)
changed = TRUE;
}
}
// Lookaheads are also propagated to the destination configuration. The
// lookahead of a configuration A -> X . Y Z may be copied to the
// configuration A -> X Y . Z in the destintation configuration set.
forEachLConf.init((*cs)->closure);
while (c1 = forEachLConf())
if ((*c1)->conf->dot->next()) {
ConfSet *dest =
(*cs)->destOf((*c1)->conf->dot->nontermRule());
listIter<LConf *> forEach(dest->closure);
LConf **c2;
while (c2 = forEach())
if ((*c1)->conf->dot->next() == (*c2)->conf->dot) {
int x = (*c2)->lookAhead.length();
(*c2)->lookAhead.setUnion((*c1)->lookAhead);
if ((*c2)->lookAhead.length() != x)
changed = TRUE;
}
}
}
} while (changed);
}
// The member function find() finds a target rule in the rule list.
ParRule *ParRuleList::find(char *target)
{
listIter<ParRule *> forEach(this);
ParRule **item;
while (item = forEach())
if (strcmp((*item)->name(), target) == 0)
return(*item);
return 0;
}
// The following show() member functions are for reporting purposes.
void ParRule::show(FILE *fp)
{
fprintf(fp, "%s =", name());
if (isNonTerm()) {
if (rhs())
rhs()->show(0, fp);
}
else fprintf(fp, " %d(precedence)", precedence);
fprintf(fp, ".\n");
}
void ParNonTermNode::show(ParNode *dot, FILE *fp)
{
if (dot == this)
printf(" *");
fprintf(fp, " %s", nonterm()->name());
if (next())
next()->show(dot, fp);
}
void ParSelectNode::show(ParNode *dot, FILE *fp)
{
alt1()->show(dot, fp);
fprintf(fp, " |");
alt2()->show(dot, fp);
}
void ParActionNode::show(ParNode *dot, FILE *fp)
{
if (dot == this)
fprintf(fp, " *");
if (code()) fprintf(fp, " %%action %s %%%%", code());
if (next())
next()->show(dot, fp);
}
void ParEndNode::show(ParNode *dot, FILE *fp)
{
if (dot == this)
fprintf(fp, " *");
}
// The following leadsTo() member functions return themselves if they
// form a rule prefix to the given target.
ParNode *ParNode::leadsTo(ParNode *target)
{
return(this == target || next()->leadsTo(target) ?
this :
0);
}
ParNode *ParSelectNode::leadsTo(ParNode *target)
{
ParNode *p;
return((p = alt1()->leadsTo(target)) ?
p :
((p = alt2()->leadsTo(target)) ? p : 0));
}
ParNode *ParEndNode::leadsTo(ParNode *target)
{
return(this == target ? target : 0);
}
int ParNonTermNode::maybeEmpty()
{
if (nonterm()->isNonTerm())
return(nonterm()->maybeEmpty && next()->maybeEmpty());
else
return FALSE;
}
void ParNonTermNode::starters(list<ParRule *> *sym)
{
if (nonterm()->isNonTerm()) {
sym->setUnion(nonterm()->starters);
if (nonterm()->maybeEmpty)
next()->starters(sym);
} else
sym->append(nonterm());
}
ParRule *ParActionNode::nontermRule()
{
if (!genName) {
static int i = 1;
char name[20];
sprintf(name, "inherited%d", i++);
genName = new InhMarker(name, this);
genName->defn(new ParEndNode(code()));
}
return genName;
}
int ParNonTermNode::precedence()
{
int x = next()->precedence();
return(nonterm()->isNonTerm() ? x : (x ? x : nonterm()->precedence));
}
int ParNonTermNode::associativity()
{
int x = next()->associativity();
return(nonterm()->isNonTerm() ? x : (x ? x : nonterm()->associativity));
}
int ParRule::RuleId = 1;
ParRuleList ParRule::ruleSet;
ParRule *ParRule::systemRoot;
syntax highlighted by Code2HTML, v. 0.9.1