// As a boot-strap, LexGen uses a hand-written lexer and parser. For
// this reason, the specification language designed to be as simple as
// possible so as to ultimately allow for a simple implementation.
// The lexer implementation is found in the lexer class, and it derives its
// source handling functionality from the Source base class.
class Lexer : public Source {
public:
// For simplicity, a fixed location is allocated for the spelling of
// scanned tokens. As such, an upper limit is also placed on token
// length.
enum { MAXSPELLING = 40 };
// Symbols to be recognised in the input specification are enumerated as
// such:
enum symbols { IDENTIFIER, CHARCONST, LEFTBRACE, RIGHTBRACE,
LEFTBRACKET, RIGHTBRACKET, LEFTPARENT, RIGHTPARENT,
EQUAL, ALTERNATE, COMMA, RANGE, PERIOD, PLUS, STAR,
IGNOREMARK, TOKENMARK,
PRELUDEMARK, POSTLUDEMARK, FILTERMARK,
SQUOTE, DQUOTE,
EOFTOKEN, UNKNOWN };
// For convenient error reporting, symbolSpelling[] holds the
// representation of each symbol above.
static char *symbolSpelling[];
private:
// The member function scan() obtains the next token from the input
// stream. It relies on function skipSpaces() to ignore whitespaces,
// and function getSpelling() to read a alpha-numeric identifier into
// the _spelling character buffer.
symbols scan();
void skipSpaces();
void getSpelling();
char _spelling[MAXSPELLING+1];
// The member function scan() records the column position of the current
// token in the instance variable tokenPosition.
int tokenPosition;
// Any leftover character to be used as lookahead for the next scan is
// placed in lookahead.
char lookahead;
// The public member function nextSymbol() relies on scan() to obtain
// the next token, and leaves the token value in _symbol as a lookahead
// token.
symbols _symbol;
public:
symbols nextSymbol() { return(_symbol = scan()); }
symbols symbol() { return _symbol; }
// The functions position() and spelling() give access to values
// contained in tokenPosition and _spelling respectively.
int position() { return tokenPosition; }
char *spelling() { return _spelling; }
// The function scanCode() copies code fragments from the input file and
// will be reviewed subsequently.
char *scanCode();
Lexer(const char *);
};
syntax highlighted by Code2HTML, v. 0.9.1