#include "global.h" #include "source.h" #include "lex.h" #include "list.h" #include "nodes.h" #include "conf.h" // A parser is made via the static function makeParser(). void ParRule::makeParser() { listIter forEach(&ruleSet); ParRule **rule; // Before the actual parser generation, consistency checks are performed // on grammar rules. // As forward declarations result in a rule stub, the consistency check // first ensures that each non-terminal has a definition. (Terminal // symbols are already marked as having a definition.) while (rule = forEach()) if (!(*rule)->hasDefinition()) error(ERROR, "'%s' is not defined\n", (*rule)->name()); // We also confirm that a root non-terminal exists. ParRule *root; forEach.init(&ruleSet); while (rule = forEach()) if ((*rule)->isNonTerm()) { root = *rule; break; } if (errorsFound() || !root) error(ABORT, "Errors encountered in BNF specification\n"); // From the root non-terminal, the super root non-terminal rule is added // with the special EOF terminal symbol. systemRoot = new ParRule(SYSROOT); ParNode* body = new ParNonTermNode(root); body->nextIs(new ParNonTermNode(ParRule::ruleSet.find(SYSEOF))); body->next()->nextIs(new ParEndNode()); systemRoot->defn(body); // Without dynamic conditional rules, reduce-reduce conflicts are // resolved by looking up the order of rules, and using the earlier // rule. The representation is ordered to reflect the ordering of rule // definition. int order; ParRule::systemRoot->rhs()->order(order); forEach.init(&ruleSet); while (rule = forEach()) if ((*rule)->isNonTerm() && (*rule)->id() < ParRule::systemRoot->id()) (*rule)->rhs()->order(order); // The maybeEmpty and startersets attributes for each rule are // precomputed for efficient usage in the parser generation process. // Since the computation depended on similar attributes in other rules, // the process was repeated until a stable equilibrium was reached. int changed; do { changed = FALSE; forEach.init(&ruleSet); while (rule = forEach()) { int answer = ((*rule)->isNonTerm() ? (*rule)->rhs()->maybeEmpty() : FALSE); if ((*rule)->maybeEmpty != answer) { changed = TRUE; (*rule)->maybeEmpty = answer; } } } while (changed); do { changed = FALSE; forEach.init(&ruleSet); while (rule = forEach()) if ((*rule)->isNonTerm()) { list sym; int originalLength = (*rule)->starters.length(); (*rule)->rhs()->starters(&sym); (*rule)->starters.setUnion(sym); if (originalLength != (*rule)->starters.length()) changed = TRUE; } } while (changed); // Instead of generating LALR(1) configurations by merging LR(1) states, // an optimised approach is used by generating LR(0) configurations and // then subsequently adding lookahead sets. // The LR(0) complete configuration graph corresponding to a parser is // generated by creating an initial configuration set as represented by // LConfList. This gives the first configuration set, from which moves // are recursively generated, with finding configurations of destination // states. LConfList *firstClosure = new LConfList(); systemRoot->rhs()->findClosure(systemRoot, firstClosure); ConfSet *initial = new ConfSet(firstClosure); initial->findMoves(); // With the LR(0) configurations completed, the lookahead sets are // constructed. // The generation of LR(0) configurations relied on having only one // LConf instance for each unique configuration. When lookahead sets // are added, LConfs occuring in different contexts must be // distinguished due to different lookaheads. As such, all LConfs are // replicated to ensure that each instance is unique. listIter forEachConfigSet(&ConfSet::confSets); ConfSet **cs; while (cs = forEachConfigSet()) (*cs)->closure->replicate(); // As with maybeEmpty and startersets above, because the lookahead sets // depend on each other, the propagation continues until an equilibrium // is reached, as indicated by the flag changed. do { listIter forEachConfigSet(&ConfSet::confSets); ConfSet **cs; changed = FALSE; while (cs = forEachConfigSet()) { listIter forEachLConf((*cs)->closure); LConf **c1; // Spontaneous lookaheads of the following form are first propagated: // A -> B . C D (c1) // C -> . E F G (c2) // When the configuration c1 and c2 belong to a configuration set, // the starters of the portion starting with D be added to the // C configuration. while (c1 = forEachLConf()) if ((*c1)->conf->dot->next() && (*c1)->conf->dot->nontermRule()->isNonTerm()) { listIter forEach((*cs)->closure); LConf **c2; while (c2 = forEach()) if ((*c1)->conf->dot->nontermRule() == (*c2)->conf->rule) { list sym; int x = (*c2)->lookAhead.length(); (*c1)->conf->dot->next()->starters(&sym); (*c2)->lookAhead.setUnion(sym); // If the end of rule is found, the lookahead set itself is propagated. if ((*c1)->conf->dot->next()->maybeEmpty()) (*c2)->lookAhead.setUnion((*c1)->lookAhead); if ((*c2)->lookAhead.length() != x) changed = TRUE; } } // Lookaheads are also propagated to the destination configuration. The // lookahead of a configuration A -> X . Y Z may be copied to the // configuration A -> X Y . Z in the destintation configuration set. forEachLConf.init((*cs)->closure); while (c1 = forEachLConf()) if ((*c1)->conf->dot->next()) { ConfSet *dest = (*cs)->destOf((*c1)->conf->dot->nontermRule()); listIter forEach(dest->closure); LConf **c2; while (c2 = forEach()) if ((*c1)->conf->dot->next() == (*c2)->conf->dot) { int x = (*c2)->lookAhead.length(); (*c2)->lookAhead.setUnion((*c1)->lookAhead); if ((*c2)->lookAhead.length() != x) changed = TRUE; } } } } while (changed); } // The member function find() finds a target rule in the rule list. ParRule *ParRuleList::find(char *target) { listIter forEach(this); ParRule **item; while (item = forEach()) if (strcmp((*item)->name(), target) == 0) return(*item); return 0; } // The following show() member functions are for reporting purposes. void ParRule::show(FILE *fp) { fprintf(fp, "%s =", name()); if (isNonTerm()) { if (rhs()) rhs()->show(0, fp); } else fprintf(fp, " %d(precedence)", precedence); fprintf(fp, ".\n"); } void ParNonTermNode::show(ParNode *dot, FILE *fp) { if (dot == this) printf(" *"); fprintf(fp, " %s", nonterm()->name()); if (next()) next()->show(dot, fp); } void ParSelectNode::show(ParNode *dot, FILE *fp) { alt1()->show(dot, fp); fprintf(fp, " |"); alt2()->show(dot, fp); } void ParActionNode::show(ParNode *dot, FILE *fp) { if (dot == this) fprintf(fp, " *"); if (code()) fprintf(fp, " %%action %s %%%%", code()); if (next()) next()->show(dot, fp); } void ParEndNode::show(ParNode *dot, FILE *fp) { if (dot == this) fprintf(fp, " *"); } // The following leadsTo() member functions return themselves if they // form a rule prefix to the given target. ParNode *ParNode::leadsTo(ParNode *target) { return(this == target || next()->leadsTo(target) ? this : 0); } ParNode *ParSelectNode::leadsTo(ParNode *target) { ParNode *p; return((p = alt1()->leadsTo(target)) ? p : ((p = alt2()->leadsTo(target)) ? p : 0)); } ParNode *ParEndNode::leadsTo(ParNode *target) { return(this == target ? target : 0); } int ParNonTermNode::maybeEmpty() { if (nonterm()->isNonTerm()) return(nonterm()->maybeEmpty && next()->maybeEmpty()); else return FALSE; } void ParNonTermNode::starters(list *sym) { if (nonterm()->isNonTerm()) { sym->setUnion(nonterm()->starters); if (nonterm()->maybeEmpty) next()->starters(sym); } else sym->append(nonterm()); } ParRule *ParActionNode::nontermRule() { if (!genName) { static int i = 1; char name[20]; sprintf(name, "inherited%d", i++); genName = new InhMarker(name, this); genName->defn(new ParEndNode(code())); } return genName; } int ParNonTermNode::precedence() { int x = next()->precedence(); return(nonterm()->isNonTerm() ? x : (x ? x : nonterm()->precedence)); } int ParNonTermNode::associativity() { int x = next()->associativity(); return(nonterm()->isNonTerm() ? x : (x ? x : nonterm()->associativity)); } int ParRule::RuleId = 1; ParRuleList ParRule::ruleSet; ParRule *ParRule::systemRoot;