#include "global.h" #include "source.h" #include "lex.h" #include "list.h" #include "nodes.h" #include "parser.h" #include "parparse.h" // The ParGen parser expects a specification as follows and builds an // internal representation for subsequent processing. // // specification ::= '%tokens' { IDENTIFIER } // { ('%left' | '%right' | '%noassoc') // { IDENTIFIER , ',' } } // [ '%prelude' code '%%' ] // [ '%inh' code '%%' ] // '%rules' { rule } // [ '%postlude' code '%%' ]. // rule ::= ruleHead [ '%attribute' code '%%' ] // '=' ruleBody '.'. // ruleHead ::= identifier. // ruleBody ::= expression. // expression ::= termSeq [ '|' expression ]. // termSeq ::= term [ termSeq ]. // term ::= identifier. // The member functions specification() and rule() have structures // consistent with that derived from the language grammar. // The prelude and postlude markers allow for C++ code to be // prepended and appended to the generated code. This is useful // for libraries on which the semantic action code depends on. void ParGenParser::specification() { int precedence = 0; ParRule *term; (new ParRule(SYSEOF))->defTerm(); expect(TOKENMARK); while (symbol() == IDENTIFIER) { if (!(term = ParRule::ruleSet.find(spelling()))) term = new ParRule(spelling()); term->defTerm(); nextSymbol(); } // Operators may be tagged with associativity rules. Precedence is // implied in the order. This is subsequently used in static conflict // resolution. while (symbol() == LEFTASSOCMARK || symbol() == RIGHTASSOCMARK || symbol() == NOASSOCMARK) { int associativity; switch (symbol()) { case LEFTASSOCMARK: associativity = ASSOC_LEFT; break; case RIGHTASSOCMARK: associativity = ASSOC_RIGHT; break; case NOASSOCMARK: associativity = ASSOC_NONE; break; } precedence++; nextSymbol(); while (symbol() == IDENTIFIER) { if (!(term = ParRule::ruleSet.find(spelling()))) term = new ParRule(spelling()); term->defTerm(); term->precedence = precedence; term->associativity = associativity; nextSymbol(); } } if (symbol() == PRELUDEMARK) { prelude = scanCode(); nextSymbol(); } if (symbol() == INHMARK) { inhAttr = scanCode(); nextSymbol(); } expect(RULESMARK); while (symbol() == IDENTIFIER) rule(); if (symbol() == POSTLUDEMARK) { postlude = scanCode(); nextSymbol(); } } // Each non-terminal rule may allow specify synthesized attributes and // evaluation functions. These are denoted by attribute and eval // markers respectively. void ParGenParser::rule() { ParRule *head; if (head = ParRule::ruleSet.find(spelling())) { // The rule is rearranged so that it occurs in the order of definition. // Subsequently, this order resolves reduce-reduce parse conflicts. ParRule::ruleSet.remove(head); ParRule::ruleSet.append(head); } else { head = new ParRule(spelling()); } nextSymbol(); if (symbol() == ATTRIBUTEMARK) { head->attributes = scanCode(); nextSymbol(); } expect(EQUAL); head->defn(expression()); expect(PERIOD); } // The member functions termSeq() and expression() contain code to // construct the appropriate internal representation for subsequent // conversion to configurations. // Rule names and alternatives have their respective representation: // ParNonTermNode and ParSelectNode. A unique ParEndNode is used to // represent the configuration which denotes the recognition of a rule // at the end of the body. Semantic action markers are denoted by a // ParActionNode. ParNode *ParGenParser::termSeq() { int ending = FALSE; ParNode *expr, *tail, *head = 0; do { /* * An identifier term returns a ParNonTermNode with a reference * to the original definition. However, names may be forward * defined. In this case, a stub definition is inserted and its * body will be elaborated when a definition is ultimately * encountered. */ if (symbol() == IDENTIFIER) { ParRule *rule; if (!(rule = ParRule::ruleSet.find(spelling()))) rule = new ParRule(spelling()); expr = new ParNonTermNode(rule); nextSymbol(); // Since component nodes are referred to by name rather than position, // an alias mechanism allows for nodes of the same syntactic category // belong in the rule body to be distinguished. if (symbol() == ALIASMARK) { nextSymbol(); if (symbol() == IDENTIFIER) { expr->aliasIs(spelling()); nextSymbol(); } } // The generated bottom-up parser allows for dynamic conflict resolution // whereby constituents are reduced only if an optional predicate // evaluates to true. This is specified by the conditional mark. } else if (symbol() == CONDMARK) { char *cond = scanCode(); nextSymbol(); if (symbol() == ACTIONMARK) { char *code = scanCode(); int posn = position(); expr = new ParEndNode(code, cond); nextSymbol(); } else expr = new ParEndNode(0, cond); ending = TRUE; } else if (symbol() == ACTIONMARK) { char *code = scanCode(); int posn = position(); nextSymbol(); if (symbol() == PERIOD || symbol() == ALTERNATE) { expr = new ParEndNode(code); ending = TRUE; } else expr = new ParActionNode(code); } else if (symbol() == PERIOD || symbol() == ALTERNATE) { expr = new ParEndNode("/*-stump-*/"); // fake action code ending = TRUE; } else { expr = 0; parseError("term expected"); ending = TRUE; nextSymbol(); } // The new term is accumulated in a suitable list representation. if (head == 0) head = expr; else tail->nextIs(expr); tail = expr; } while (!ending); return head; } ParNode *ParGenParser::expression() { ParNode *expr; expr = termSeq(); if (symbol() == Lexer::ALTERNATE) { nextSymbol(); return(new ParSelectNode(expr, expression())); } return expr; } char *ParGenParser::prelude = 0; char *ParGenParser::postlude = 0; char *ParGenParser::inhAttr = 0; char *ParGenParser::currentfile = 0;