// As a boot-strap, LexGen uses a hand-written lexer and parser. For // this reason, the specification language designed to be as simple as // possible so as to ultimately allow for a simple implementation. // The lexer implementation is found in the lexer class, and it derives its // source handling functionality from the Source base class. class Lexer : public Source { public: // For simplicity, a fixed location is allocated for the spelling of // scanned tokens. As such, an upper limit is also placed on token // length. enum { MAXSPELLING = 40 }; // Symbols to be recognised in the input specification are enumerated as // such: enum symbols { IDENTIFIER, CHARCONST, LEFTBRACE, RIGHTBRACE, LEFTBRACKET, RIGHTBRACKET, LEFTPARENT, RIGHTPARENT, EQUAL, ALTERNATE, COMMA, RANGE, PERIOD, PLUS, STAR, IGNOREMARK, TOKENMARK, PRELUDEMARK, POSTLUDEMARK, FILTERMARK, SQUOTE, DQUOTE, EOFTOKEN, UNKNOWN }; // For convenient error reporting, symbolSpelling[] holds the // representation of each symbol above. static char *symbolSpelling[]; private: // The member function scan() obtains the next token from the input // stream. It relies on function skipSpaces() to ignore whitespaces, // and function getSpelling() to read a alpha-numeric identifier into // the _spelling character buffer. symbols scan(); void skipSpaces(); void getSpelling(); char _spelling[MAXSPELLING+1]; // The member function scan() records the column position of the current // token in the instance variable tokenPosition. int tokenPosition; // Any leftover character to be used as lookahead for the next scan is // placed in lookahead. char lookahead; // The public member function nextSymbol() relies on scan() to obtain // the next token, and leaves the token value in _symbol as a lookahead // token. symbols _symbol; public: symbols nextSymbol() { return(_symbol = scan()); } symbols symbol() { return _symbol; } // The functions position() and spelling() give access to values // contained in tokenPosition and _spelling respectively. int position() { return tokenPosition; } char *spelling() { return _spelling; } // The function scanCode() copies code fragments from the input file and // will be reviewed subsequently. char *scanCode(); Lexer(const char *); };