#include "global.h" #include "source.h" #include "lex.h" // For the purpose of error reporting, it is convenient to have // representations of the various tokens. char *Lexer::symbolSpelling[] = { "", "", "{", "}", "[", "]", "(", ")", "=", "|", ",", "..", ".", "+", "*", "%sets", "%ignore", "%tokens", "'", "\"", "", "??" }; Lexer::symbols Lexer::scan() { // The member function scan() relies on two lookup tables for character // to token mappings. The charLookUp[] table maps single characters to // token values. static struct charLookUpType { int ch; symbols value; } charLookUp[] = { { '=', EQUAL }, { ',', COMMA }, { '|', ALTERNATE }, { '{', LEFTBRACE }, { '}', RIGHTBRACE }, { '[', LEFTBRACKET }, { ']', RIGHTBRACKET }, { '(', LEFTPARENT }, { ')', RIGHTPARENT }, { '*', STAR }, { '+', PLUS }, { '\'', SQUOTE }, { '"', DQUOTE }, { EOF, EOFTOKEN } }; static int charLookUpNumbers = sizeof(charLookUp)/sizeof(struct charLookUpType); // The markLookUp[] table maps % strings to token values. static struct markLookUpType { char *m; symbols value; } markLookUp[] = { { "ignore", IGNOREMARK }, { "prelude", PRELUDEMARK }, { "postlude", POSTLUDEMARK }, { "filter", FILTERMARK }, { "tokens", TOKENMARK } }; static int markLookUpNumbers = sizeof(markLookUp)/sizeof(struct markLookUpType); // The processing for scan() first involves ignoring whitespaces. // Subsequently, the token position is noted. skipSpaces(); tokenPosition = charPosition(); // If the starting character falls into the alphanumeric // catagory for identifiers, the function getSpelling() is // invoked to read it. if ((lookahead >= 'a' && lookahead <= 'z') || (lookahead >= 'A' && lookahead <= 'Z') || lookahead == '_') { getSpelling(); return IDENTIFIER; } // Character constants are prefixed with the '$' literal or '#' ASCII // value markers. In the former case, the next character is merely // read, whereas in the latter case, a string to integer conversion is // necessary. if (lookahead == '$') { _spelling[0] = nextChar(); _spelling[1] = '\0'; lookahead = nextChar(); return CHARCONST; } if (lookahead == '#') { int value = 0; lookahead = nextChar(); while (lookahead >= '0' && lookahead <= '9') { value = value*10 + lookahead - '0'; lookahead = nextChar(); } _spelling[0] = value; _spelling[1] = '\0'; return CHARCONST; } // Comment conventions follow that of C++ and C. // Input between double slashes (//) till the end of the line, // and those between /* and */ are ignored. if (lookahead == '/') { lookahead = nextChar(); if (lookahead == '/') { while ((lookahead = nextChar()) != '\n') ; lookahead = nextChar(); return(scan()); } else if (lookahead == '*') { char c1 = nextChar(); char c2 = nextChar(); while (c1 != '*' && c2 != '/') { c1 = c2; c2 = nextChar(); } lookahead = nextChar(); return(scan()); } } // Single character tokens are recognised by scanning the // charLookUp[] table. for (int i=0; i= 'a' && lookahead <= 'z') || (lookahead >= 'A' && lookahead <= 'Z') || (lookahead >= '0' && lookahead <= '9') || lookahead == '_'); _spelling[i] = 0; } void Lexer::skipSpaces() { while (lookahead == ' ' || lookahead == '\n' || lookahead == '\t') lookahead = nextChar(); } Lexer::Lexer(const char *fn) : Source(fn) { if (status() == CANNOTOPEN) error(ABORT, "cannot open '%s'\n", fileName()); lookahead = nextChar(); nextSymbol(); } char *Lexer::scanCode() { char buf[20480]; char *p; int i = 0; tokenPosition = currentLine(); // A code block is terminated with an double percentage (%%). for (;;) { if (lookahead == '%') { lookahead = nextChar(); if (lookahead == '%') break; buf[i++] = '%'; } buf[i++] = lookahead; lookahead = nextChar(); } lookahead = ' '; buf[i++] = '\0'; // The code block is allocated an appropriate memory block. It is // prefixed with a '#line' directive to enable any compilation errors // to be reported with respect to the original specification file // rather than the generated file produce by LexGen. p = allocate(i+20+strlen(fileName())); sprintf(p, "\n#line %d \"%s\"\n%s", tokenPosition, fileName(), buf); return p; }