#include "global.h"
#include "source.h"
#include "lex.h"
// For the purpose of error reporting, it is convenient to have
// representations of the various tokens.
char *Lexer::symbolSpelling[] = {
"<identifier>", "<charconst>",
"{", "}", "[", "]", "(", ")", "=", "|", ",", "..", ".", "+", "*",
"%sets", "%ignore", "%tokens",
"'", "\"",
"<eof>", "??"
};
Lexer::symbols Lexer::scan()
{
// The member function scan() relies on two lookup tables for character
// to token mappings. The charLookUp[] table maps single characters to
// token values.
static struct charLookUpType {
int ch; symbols value;
} charLookUp[] = {
{ '=', EQUAL },
{ ',', COMMA },
{ '|', ALTERNATE },
{ '{', LEFTBRACE },
{ '}', RIGHTBRACE },
{ '[', LEFTBRACKET },
{ ']', RIGHTBRACKET },
{ '(', LEFTPARENT },
{ ')', RIGHTPARENT },
{ '*', STAR },
{ '+', PLUS },
{ '\'', SQUOTE },
{ '"', DQUOTE },
{ EOF, EOFTOKEN }
};
static int charLookUpNumbers =
sizeof(charLookUp)/sizeof(struct charLookUpType);
// The markLookUp[] table maps % strings to token values.
static struct markLookUpType {
char *m; symbols value;
} markLookUp[] = {
{ "ignore", IGNOREMARK },
{ "prelude", PRELUDEMARK },
{ "postlude", POSTLUDEMARK },
{ "filter", FILTERMARK },
{ "tokens", TOKENMARK }
};
static int markLookUpNumbers =
sizeof(markLookUp)/sizeof(struct markLookUpType);
// The processing for scan() first involves ignoring whitespaces.
// Subsequently, the token position is noted.
skipSpaces();
tokenPosition = charPosition();
// If the starting character falls into the alphanumeric
// catagory for identifiers, the function getSpelling() is
// invoked to read it.
if ((lookahead >= 'a' && lookahead <= 'z') ||
(lookahead >= 'A' && lookahead <= 'Z') ||
lookahead == '_') {
getSpelling();
return IDENTIFIER;
}
// Character constants are prefixed with the '$' literal or '#' ASCII
// value markers. In the former case, the next character is merely
// read, whereas in the latter case, a string to integer conversion is
// necessary.
if (lookahead == '$') {
_spelling[0] = nextChar();
_spelling[1] = '\0';
lookahead = nextChar();
return CHARCONST;
}
if (lookahead == '#') {
int value = 0;
lookahead = nextChar();
while (lookahead >= '0' && lookahead <= '9') {
value = value*10 + lookahead - '0';
lookahead = nextChar();
}
_spelling[0] = value;
_spelling[1] = '\0';
return CHARCONST;
}
// Comment conventions follow that of C++ and C.
// Input between double slashes (//) till the end of the line,
// and those between /* and */ are ignored.
if (lookahead == '/') {
lookahead = nextChar();
if (lookahead == '/') {
while ((lookahead = nextChar()) != '\n')
;
lookahead = nextChar();
return(scan());
} else if (lookahead == '*') {
char c1 = nextChar();
char c2 = nextChar();
while (c1 != '*' && c2 != '/') {
c1 = c2;
c2 = nextChar();
}
lookahead = nextChar();
return(scan());
}
}
// Single character tokens are recognised by scanning the
// charLookUp[] table.
for (int i=0; i<charLookUpNumbers; i++)
if (charLookUp[i].ch == lookahead) {
_spelling[0] = lookahead;
_spelling[1] = '\0';
lookahead = nextChar();
return(charLookUp[i].value);
}
// Tokens involving two or more characters however require
// specific attention. Here, we differentiate between '.' and '..'.
if (lookahead == '.') {
_spelling[0] = '.';
lookahead = nextChar();
if (lookahead == '.') {
_spelling[1] = '.';
_spelling[2] = '\0';
lookahead = nextChar();
return(RANGE);
} else {
_spelling[1] = '\0';
return(PERIOD);
}
}
// % markers are checked against the markLookUp table
if (lookahead == '%') {
lookahead = nextChar();
getSpelling();
for (int i=0; i<markLookUpNumbers; i++)
if (strcmp(markLookUp[i].m, _spelling) == 0)
return(markLookUp[i].value);
error(ERROR, "does not recognise mark `%s'\n", _spelling);
}
// If tokens cannot be recognised by this stage, it is
// erroneous and the UNKNOWN symbol is returned instead.
_spelling[0] = lookahead;
_spelling[1] = '\0';
return UNKNOWN;
}
// The getSpelling() member function scans identifier spellings into
// the character block _spelling[].
void Lexer::getSpelling()
{
int i = 0;
do {
if (i < MAXSPELLING)
_spelling[i++] = lookahead;
lookahead = nextChar();
} while ((lookahead >= 'a' && lookahead <= 'z') ||
(lookahead >= 'A' && lookahead <= 'Z') ||
(lookahead >= '0' && lookahead <= '9') ||
lookahead == '_');
_spelling[i] = 0;
}
void Lexer::skipSpaces()
{
while (lookahead == ' ' || lookahead == '\n' || lookahead == '\t')
lookahead = nextChar();
}
Lexer::Lexer(const char *fn) : Source(fn)
{
if (status() == CANNOTOPEN)
error(ABORT, "cannot open '%s'\n", fileName());
lookahead = nextChar();
nextSymbol();
}
char *Lexer::scanCode()
{
char buf[20480];
char *p;
int i = 0;
tokenPosition = currentLine();
// A code block is terminated with an double percentage (%%).
for (;;) {
if (lookahead == '%') {
lookahead = nextChar();
if (lookahead == '%')
break;
buf[i++] = '%';
}
buf[i++] = lookahead;
lookahead = nextChar();
}
lookahead = ' ';
buf[i++] = '\0';
// The code block is allocated an appropriate memory block. It is
// prefixed with a '#line' directive to enable any compilation errors
// to be reported with respect to the original specification file
// rather than the generated file produce by LexGen.
p = allocate(i+20+strlen(fileName()));
sprintf(p, "\n#line %d \"%s\"\n%s", tokenPosition, fileName(), buf);
return p;
}
syntax highlighted by Code2HTML, v. 0.9.1