// As a boot-strap, LexGen uses a hand-written lexer and parser.  For
// this reason, the specification language designed to be as simple as
// possible so as to ultimately allow for a simple implementation.

// The lexer implementation is found in the lexer class, and it derives its
// source handling functionality from the Source base class.

class Lexer : public Source {
public:

// For simplicity, a fixed location is allocated for the spelling of
// scanned tokens.  As such, an upper limit is also placed on token
// length.

	enum { MAXSPELLING = 40 };

// Symbols to be recognised in the input specification are enumerated as
// such:

	enum symbols { IDENTIFIER, CHARCONST, LEFTBRACE, RIGHTBRACE,
			LEFTBRACKET, RIGHTBRACKET, LEFTPARENT, RIGHTPARENT,
			EQUAL, ALTERNATE, COMMA, RANGE, PERIOD, PLUS, STAR,
			IGNOREMARK, TOKENMARK,
			PRELUDEMARK, POSTLUDEMARK, FILTERMARK, 
			SQUOTE, DQUOTE,
			EOFTOKEN, UNKNOWN };

// For convenient error reporting, symbolSpelling[] holds the
// representation of each symbol above.

	static char *symbolSpelling[];
private:

// The member function scan() obtains the next token from the input
// stream.  It relies on function skipSpaces() to ignore whitespaces,
// and function getSpelling() to read a alpha-numeric identifier into
// the _spelling character buffer.

	symbols scan();
	void skipSpaces();
	void getSpelling();
	char _spelling[MAXSPELLING+1];

// The member function scan() records the column position of the current
// token in the instance variable tokenPosition.

	int tokenPosition;

// Any leftover character to be used as lookahead for the next scan is
// placed in lookahead.

	char lookahead;

// The public member function nextSymbol() relies on scan() to obtain
// the next token, and leaves the token value in _symbol as a lookahead
// token.

	symbols _symbol;

public:
	symbols nextSymbol() { return(_symbol = scan()); }
	symbols symbol() { return _symbol; }

// The functions position() and spelling() give access to values
// contained in tokenPosition and _spelling respectively.

	int position() { return tokenPosition; }
	char *spelling() { return _spelling; }

// The function scanCode() copies code fragments from the input file and
// will be reviewed subsequently.

	char *scanCode();

	Lexer(const char *);
};


syntax highlighted by Code2HTML, v. 0.9.1