// As a boot-strap, ParGen uses a hand-written lexer and parser. For
// this reason, the specification language designed to be as simple as
// possible so as to ultimately allow for a simple implementation.
// The lexer implementation is found the lexer class, and it derives its
// source handling functionality from the Source base class.
class Lexer : public Source {
public:
// Symbols to be recognised in the input specification are enumerated as
// such:
enum symbols { IDENTIFIER, CHARCONST, LEFTBRACE, RIGHTBRACE,
LEFTBRACKET, RIGHTBRACKET, LEFTPARENT, RIGHTPARENT,
EQUAL, ALTERNATE, COMMA, RANGE, PERIOD,
PLUS, MINUS, STAR, DIV,
RULESMARK, TOKENMARK, INHMARK,
LEFTASSOCMARK, RIGHTASSOCMARK, NOASSOCMARK,
ALIASMARK, ACTIONMARK, CONDMARK,
ATTRIBUTEMARK, PRELUDEMARK, POSTLUDEMARK,
EOFTOKEN, UNKNOWN };
public:
Lexer(char *);
symbols nextSymbol() { return(_symbol = scan()); }
symbols symbol() { return _symbol; }
// The functions position() and spelling() give access to values
// contained in tokenPosition and _spelling respectively.
int position() { return tokenPosition; }
char *spelling() { return _spelling; }
// The function scanCode() copies code fragments from the input file and
// will be reviewed subsequently.
char *scanCode();
private:
// The member function scan() obtains the next token from the input
// stream. It relies on function skipSpaces() to ignore whitespaces,
// and function getSpelling() to read a alpha-numeric identifier into
// the _spelling character buffer.
symbols scan();
void skipSpaces();
void getSpelling(const char *prefix = "");
bool identifyChar(symbols &val);
bool identifyMarker(symbols &val);
symbols char2id(char c);
public:
// For simplicity, a fixed location is allocated for the spelling of
// scanned tokens. As such, an upper limit is also placed on token
// length.
enum { MAXSPELLING = 40 };
// For convenient error reporting, symbolSpelling[] holds the
// representation of each symbol above.
static char *symbolSpelling[];
private:
char _spelling[MAXSPELLING+1];
// The member function scan() records the column position of the current
// token in the instance variable tokenPosition.
int tokenPosition;
// Any leftover character to be used as lookahead for the next scan is
// placed in lookahead.
char lookahead;
// The public member function nextSymbol() relies on scan() to obtain
// the next token, and leaves the token value in _symbol as a lookahead
// token.
symbols _symbol;
};
syntax highlighted by Code2HTML, v. 0.9.1