// As a boot-strap, ParGen uses a hand-written lexer and parser.  For
// this reason, the specification language designed to be as simple as
// possible so as to ultimately allow for a simple implementation.

// The lexer implementation is found the lexer class, and it derives its
// source handling functionality from the Source base class.

class Lexer : public Source {
	public:
		// Symbols to be recognised in the input specification are enumerated as
		// such:

		enum symbols { IDENTIFIER, CHARCONST, LEFTBRACE, RIGHTBRACE,
			LEFTBRACKET, RIGHTBRACKET, LEFTPARENT, RIGHTPARENT,
			EQUAL, ALTERNATE, COMMA, RANGE, PERIOD, 
			PLUS, MINUS, STAR, DIV,
			RULESMARK, TOKENMARK, INHMARK,
			LEFTASSOCMARK, RIGHTASSOCMARK, NOASSOCMARK,
			ALIASMARK, ACTIONMARK, CONDMARK,
			ATTRIBUTEMARK, PRELUDEMARK, POSTLUDEMARK,
			EOFTOKEN, UNKNOWN };

	public:
		Lexer(char *);

		symbols nextSymbol() { return(_symbol = scan()); }
		symbols symbol() { return _symbol; }

		// The functions position() and spelling() give access to values
		// contained in tokenPosition and _spelling respectively.

		int position() { return tokenPosition; }
		char *spelling() { return _spelling; }

		// The function scanCode() copies code fragments from the input file and
		// will be reviewed subsequently.

		char *scanCode();

	private:

		// The member function scan() obtains the next token from the input
		// stream.  It relies on function skipSpaces() to ignore whitespaces,
		// and function getSpelling() to read a alpha-numeric identifier into
		// the _spelling character buffer.

		symbols scan();
		void skipSpaces();
		void getSpelling(const char *prefix = "");
		bool identifyChar(symbols &val);
		bool identifyMarker(symbols &val);
		symbols char2id(char c);

	public:
		// For simplicity, a fixed location is allocated for the spelling of
		// scanned tokens.  As such, an upper limit is also placed on token
		// length.

		enum { MAXSPELLING = 40 };

		// For convenient error reporting, symbolSpelling[] holds the
		// representation of each symbol above.

		static char *symbolSpelling[];

	private:
		char _spelling[MAXSPELLING+1];

		// The member function scan() records the column position of the current
		// token in the instance variable tokenPosition.

		int tokenPosition;

		// Any leftover character to be used as lookahead for the next scan is
		// placed in lookahead.

		char lookahead;

		// The public member function nextSymbol() relies on scan() to obtain
		// the next token, and leaves the token value in _symbol as a lookahead
		// token.

		symbols _symbol;
};


syntax highlighted by Code2HTML, v. 0.9.1