// As a boot-strap, ParGen uses a hand-written lexer and parser. For // this reason, the specification language designed to be as simple as // possible so as to ultimately allow for a simple implementation. // The lexer implementation is found the lexer class, and it derives its // source handling functionality from the Source base class. class Lexer : public Source { public: // Symbols to be recognised in the input specification are enumerated as // such: enum symbols { IDENTIFIER, CHARCONST, LEFTBRACE, RIGHTBRACE, LEFTBRACKET, RIGHTBRACKET, LEFTPARENT, RIGHTPARENT, EQUAL, ALTERNATE, COMMA, RANGE, PERIOD, PLUS, MINUS, STAR, DIV, RULESMARK, TOKENMARK, INHMARK, LEFTASSOCMARK, RIGHTASSOCMARK, NOASSOCMARK, ALIASMARK, ACTIONMARK, CONDMARK, ATTRIBUTEMARK, PRELUDEMARK, POSTLUDEMARK, EOFTOKEN, UNKNOWN }; public: Lexer(char *); symbols nextSymbol() { return(_symbol = scan()); } symbols symbol() { return _symbol; } // The functions position() and spelling() give access to values // contained in tokenPosition and _spelling respectively. int position() { return tokenPosition; } char *spelling() { return _spelling; } // The function scanCode() copies code fragments from the input file and // will be reviewed subsequently. char *scanCode(); private: // The member function scan() obtains the next token from the input // stream. It relies on function skipSpaces() to ignore whitespaces, // and function getSpelling() to read a alpha-numeric identifier into // the _spelling character buffer. symbols scan(); void skipSpaces(); void getSpelling(const char *prefix = ""); bool identifyChar(symbols &val); bool identifyMarker(symbols &val); symbols char2id(char c); public: // For simplicity, a fixed location is allocated for the spelling of // scanned tokens. As such, an upper limit is also placed on token // length. enum { MAXSPELLING = 40 }; // For convenient error reporting, symbolSpelling[] holds the // representation of each symbol above. static char *symbolSpelling[]; private: char _spelling[MAXSPELLING+1]; // The member function scan() records the column position of the current // token in the instance variable tokenPosition. int tokenPosition; // Any leftover character to be used as lookahead for the next scan is // placed in lookahead. char lookahead; // The public member function nextSymbol() relies on scan() to obtain // the next token, and leaves the token value in _symbol as a lookahead // token. symbols _symbol; };