/*
Copyright (c) 2003, Steve Dekorte
All rights reserved. See _BSDLicense.txt.
Aug 2004 - removed {} from op chars
- changed identifier to stop after 1 colon
*/
#include "IoLexer.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stddef.h>
//#define LEXER_DEBUG
//#define LEXER_DEBUG_TOKENS
#define TEST_INLINE inline
static IoToken *IoLexer_currentToken(IoLexer *self)
{
return List_top(self->tokenStream);
}
IoLexer *IoLexer_new(void)
{
IoLexer *self = (IoLexer *)calloc(1, sizeof(IoLexer));
self->s = (char *)calloc(1, 1);
self->posStack = Stack_new();
self->tokenStack = Stack_new();
self->tokenStream = List_new();
self->charLineIndex = List_new();
return self;
}
void IoLexer_free(IoLexer *self)
{
IoLexer_clear(self);
free(self->s);
Stack_free(self->posStack);
Stack_free(self->tokenStack);
List_free(self->tokenStream);
List_free(self->charLineIndex);
if(self->errorDescription) free(self->errorDescription);
free(self);
}
char *IoLexer_errorDescription(IoLexer *self)
{
IoToken *et = IoLexer_errorToken(self);
if (!self->errorDescription)
{
self->errorDescription = calloc(1, 1024);
}
if (et)
{
sprintf(self->errorDescription,
"\"%s\" on line %i character %i",
et->error,
IoToken_lineNumber(et),
IoToken_charNumber(et));
}
return self->errorDescription;
}
void IoLexer_buildLineIndex(IoLexer *self)
{
char *s = self->s;
List_removeAll(self->charLineIndex);
List_append_(self->charLineIndex, s);
while (*s)
{
if (*s == '\n')
{
List_append_(self->charLineIndex, s);
}
s ++;
}
List_append_(self->charLineIndex, s);
self->lineHint = 0;
}
// next/prev character ------------------------
#define UTF8_SEQLEN(c) ( \
(c) < 0x80 ? 1 : \
(c) < 0xe0 ? 2 : \
(c) < 0xf0 ? 3 : \
(c) < 0xf8 ? 4 : \
(c) < 0xfc ? 5 : \
(c) < 0xfe ? 6 : 1 \
)
#define INVALID_CHAR 0xfffe
static uchar_t _IoLexer_DecodeUTF8(const unsigned char *s)
{
if (*s < 0x80)
return *s;
else if (*s < 0xc2)
return INVALID_CHAR;
else if (*s < 0xe0)
{
if (!((s[1] ^ 0x80) < 0x40))
return INVALID_CHAR;
return ((uchar_t)(s[0] & 0x1f) << 6) | (uchar_t)(s[1] ^ 0x80);
}
else if (*s < 0xf0)
{
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[0] >= 0xe1 || s[1] >= 0xa0)))
return INVALID_CHAR;
return ((uchar_t)(s[0] & 0x0f) << 12) | ((uchar_t)(s[1] ^ 0x80) << 6) | (uchar_t)(s[2] ^ 0x80);
}
else if (*s < 0xf8)
{
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[0] >= 0xf1 || s[1] >= 0x90)))
return INVALID_CHAR;
return ((uchar_t)(s[0] & 0x07) << 18) | ((uchar_t)(s[1] ^ 0x80) << 12) | ((uchar_t)(s[2] ^ 0x80) << 6) | (uchar_t)(s[3] ^ 0x80);
}
else if (*s < 0xfc)
{
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[0] >= 0xf9 || s[1] >= 0x88)))
return INVALID_CHAR;
return ((uchar_t)(s[0] & 0x03) << 24) | ((uchar_t)(s[1] ^ 0x80) << 18) | ((uchar_t)(s[2] ^ 0x80) << 12) | ((uchar_t)(s[3] ^ 0x80) << 6) | (uchar_t)(s[4] ^ 0x80);
}
else if (*s < 0xfe)
{
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[5] ^ 0x80) < 0x40 && (s[0] >= 0xfd || s[1] >= 0x84)))
return INVALID_CHAR;
return ((uchar_t)(s[0] & 0x01) << 30) | ((uchar_t)(s[1] ^ 0x80) << 24) | ((uchar_t)(s[2] ^ 0x80) << 18) | ((uchar_t)(s[3] ^ 0x80) << 12) | ((uchar_t)(s[4] ^ 0x80) << 6) | (uchar_t)(s[5] ^ 0x80);
}
else
return INVALID_CHAR;
}
TEST_INLINE uchar_t IoLexer_nextChar(IoLexer *self)
{
unsigned char c = (unsigned char) * (self->current);
int seqlen, i;
uchar_t uch;
if (c == 0)
{
return 0;
}
else if (c < 0x80)
{
self->current++;
return c;
}
seqlen = UTF8_SEQLEN(c);
for (i = 0; i < seqlen; i++)
{
if (self->current[i] == 0)
{
// XXX: invalid or incomplete sequence
return 0;
}
}
uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
if (uch == INVALID_CHAR)
{
return 0;
}
self->current += seqlen;
return uch;
}
TEST_INLINE uchar_t IoLexer_prevChar(IoLexer *self)
{
uchar_t uch;
int len;
for (len = 1; len <= 6 && self->current - len > self->s; len++)
{
unsigned char c = *(unsigned char *)(self->current - len);
if (c < 0x80 || c >= 0xc2)
break;
}
self->current -= len;
uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
if (uch == INVALID_CHAR)
return 0;
return uch;
}
TEST_INLINE char *IoLexer_current(IoLexer *self)
{
return self->current;
}
TEST_INLINE int IoLexer_onNULL(IoLexer *self)
{
return (*(self->current) == 0);
}
// ------------------------------------------
size_t IoLexer_currentLineNumberOld(IoLexer *self)
{
size_t lineNumber = 1;
char *s = self->s;
while (s < self->current)
{
if (*s == '\n')
{
lineNumber ++;
}
s ++;
}
return lineNumber;
}
TEST_INLINE size_t IoLexer_currentLineNumber(IoLexer *self)
{
// this should be even faster than a binary search
// since almost all results are very close to the last
List *index = self->charLineIndex;
size_t line = self->lineHint;
size_t numLines = List_size(index);
void *current = (void *)self->current;
if (current < List_at_(index, line))
{
// walk down lines until char is bigger than one
while (line > 0 && !(current > List_at_(index, line)))
{
line --;
}
line ++;
}
else
{
// walk up lines until char is less than or equal to one
while (line < numLines && !(current <= List_at_(index, line)))
{
line ++;
}
}
self->lineHint = line;
/*
{
size_t realLine = IoLexer_currentLineNumberOld(self);
if (line != realLine)
{
printf("mismatch on currentLine %i != %i\n", (int)line, (int)realLine);
}
}
*/
return line;
}
void IoLexer_clear(IoLexer *self)
{
LIST_FOREACH(self->tokenStream, i, t, IoToken_free((IoToken *)t) );
List_removeAll(self->tokenStream);
Stack_clear(self->posStack);
Stack_clear(self->tokenStack);
self->current = self->s;
self->resultIndex = 0;
self->maxChar = 0;
self->errorToken = NULL;
}
IoToken *IoLexer_errorToken(IoLexer *self)
{
return self->errorToken;
}
// lexing -------------------------------------
void IoLexer_string_(IoLexer *self, const char *string)
{
self->s = strcpy((char *)realloc(self->s, strlen(string) + 1), string);
self->current = self->s;
IoLexer_buildLineIndex(self);
}
void IoLexer_printLast_(IoLexer *self, int max)
{
char *s = self->s + self->maxChar;
int i;
for (i = 0; i < max && s[i]; i ++)
{
putchar(s[i]);
}
}
// --- token and character position stacks ---
char *IoLexer_lastPos(IoLexer *self)
{
return Stack_top(self->posStack);
}
TEST_INLINE void IoLexer_pushPos(IoLexer *self)
{
ptrdiff_t index = self->current - self->s;
if (index > (ptrdiff_t)self->maxChar)
{
self->maxChar = index;
}
Stack_push_(self->tokenStack, (void *)(ptrdiff_t)(List_size(self->tokenStream) - 1));
Stack_push_(self->posStack, self->current);
#ifdef LEXER_DEBUG
printf("push: ");
IoLexer_print(self);
#endif
}
TEST_INLINE void IoLexer_popPos(IoLexer *self)
{
Stack_pop(self->tokenStack);
Stack_pop(self->posStack);
#ifdef LEXER_DEBUG
printf("pop: ");
IoLexer_print(self);
#endif
}
TEST_INLINE void IoLexer_popPosBack(IoLexer *self)
{
ptrdiff_t i = (ptrdiff_t)Stack_pop(self->tokenStack);
ptrdiff_t topIndex = (ptrdiff_t)Stack_top(self->tokenStack);
if (i > -1)
{
List_setSize_(self->tokenStream, i + 1);
if (i != topIndex) // ok to free token
{
IoToken *parent = IoLexer_currentToken(self);
if (parent)
{
IoToken_nextToken_(parent, NULL);
}
}
}
self->current = Stack_pop(self->posStack);
#ifdef LEXER_DEBUG
printf("back: "); IoLexer_print(self);
#endif
}
// ------------------------------------------
int IoLexer_lex(IoLexer *self)
{
IoLexer_clear(self);
IoLexer_pushPos(self);
IoLexer_messageChain(self);
if (*(self->current))
{
//printf("Lexing error after: ");
//IoLexer_printLast_(self, 30);
//printf("\n");
if (!self->errorToken)
{
if (List_size(self->tokenStream))
{
self->errorToken = IoLexer_currentToken(self);
}
else
{
self->errorToken = IoLexer_addTokenString_length_type_(self, self->current, 30, NO_TOKEN);
}
IoToken_error_(self->errorToken, "Syntax error near this location");
}
return -1;
}
return 0;
}
// getting results --------------------------------
IoToken *IoLexer_top(IoLexer *self)
{
return List_at_(self->tokenStream, self->resultIndex);
}
IoTokenType IoLexer_topType(IoLexer *self)
{
if (!IoLexer_top(self))
{
return 0;
}
return IoLexer_top(self)->type;
}
IoToken *IoLexer_pop(IoLexer *self)
{
IoToken *t = IoLexer_top(self);
self->resultIndex ++;
return t;
}
// stack management --------------------------------
void IoLexer_print(IoLexer *self)
{
IoToken *first = List_first(self->tokenStream);
if (first)
{
IoToken_print(first);
}
printf("\n");
}
void IoLexer_printTokens(IoLexer *self)
{
int i;
for (i = 0; i < List_size(self->tokenStream); i ++)
{
IoToken *t = List_at_(self->tokenStream, i);
printf("'%s'", t->name);
printf(" %s ", IoToken_typeName(t));
if (i < List_size(self->tokenStream) - 1)
{
printf(", ");
}
}
printf("\n");
}
// grabbing ---------------------------------------------
int IoLexer_grabLength(IoLexer *self)
{
char *s1 = IoLexer_lastPos(self);
char *s2 = IoLexer_current(self);
return s2 - s1;
}
void IoLexer_grabTokenType_(IoLexer *self, IoTokenType type)
{
char *s1 = IoLexer_lastPos(self);
char *s2 = IoLexer_current(self);
size_t len = (s2 - s1);
if (!len)
{
printf("IoLexer fatal error: empty token\n");
exit(1);
}
IoLexer_addTokenString_length_type_(self, s1, len, type);
}
IoToken *IoLexer_addTokenString_length_type_(IoLexer *self, const char *s1, size_t len, IoTokenType type)
{
IoToken *top = IoLexer_currentToken(self);
IoToken *t = IoToken_new();
t->lineNumber = IoLexer_currentLineNumber(self);
//t->charNumber = (int)(s1 - self->s);
t->charNumber = (int)(self->current - self->s);
if (t->charNumber < 0)
{
printf("bad t->charNumber = %i\n", t->charNumber);
}
IoToken_name_length_(t, s1, len);
IoToken_type_(t, type);
if (top)
{
IoToken_nextToken_(top, t);
}
List_push_(self->tokenStream, t);
#ifdef LEXER_DEBUG_TOKENS
printf("token '%s' %s\n", t->name, IoToken_typeName(t));
#endif
return t;
}
// reading ------------------------------------
void IoLexer_messageChain(IoLexer *self)
{
do
{
while ( IoLexer_readTerminator(self) ||
IoLexer_readSeparator(self) ||
IoLexer_readComment(self))
{}
} while ( IoLexer_readMessage(self));
}
// message -------------------------------
static void IoLexer_readMessage_error(IoLexer *self, const char *name)
{
IoLexer_popPosBack(self);
self->errorToken = IoLexer_currentToken(self);
IoToken_error_(self->errorToken, name);
}
int IoLexer_readTokenChars_type_(IoLexer *self, const char *chars, IoTokenType type)
{
while (*chars)
{
if (IoLexer_readTokenChar_type_(self, *chars, type)) return 1;
chars ++;
}
return 0;
}
const char *IoLexer_nameForGroupChar_(IoLexer *self, char groupChar)
{
switch (groupChar)
{
case '(': return "";
case '[': return "squareBrackets";
case '{': return "curlyBrackets";
}
printf("IoLexer: fatal error - invalid group char %c\n", groupChar);
exit(1);
}
static char *specialChars = ":._";
int IoLexer_readMessage(IoLexer *self)
{
char foundSymbol;
IoLexer_pushPos(self);
IoLexer_readPadding(self);
foundSymbol = IoLexer_readSymbol(self);
{
char groupChar;
while (IoLexer_readSeparator(self) || IoLexer_readComment(self))
{}
groupChar = *IoLexer_current(self);
if (groupChar && (strchr("[{", groupChar) || (!foundSymbol && groupChar == '(')))
{
char *groupName = (char *)IoLexer_nameForGroupChar_(self, groupChar);
IoLexer_addTokenString_length_type_(self, groupName, strlen(groupName), IDENTIFIER_TOKEN);
}
if (IoLexer_readTokenChars_type_(self, "([{", OPENPAREN_TOKEN))
{
IoLexer_readPadding(self);
do {
IoTokenType type = IoLexer_currentToken(self)->type;
IoLexer_readPadding(self);
// Empty argument: (... ,)
if (COMMA_TOKEN == type)
{
char c = *IoLexer_current(self);
if (',' == c || strchr(")]}", c))
{
IoLexer_readMessage_error(self, "missing argument in argument list");
return 0;
}
}
if (groupChar == '[') specialChars = "._";
IoLexer_messageChain(self);
if (groupChar == '[') specialChars = ":._";
IoLexer_readPadding(self);
} while (IoLexer_readTokenChar_type_(self, ',', COMMA_TOKEN));
if (!IoLexer_readTokenChars_type_(self, ")]}", CLOSEPAREN_TOKEN))
{
/*
char c = *IoLexer_current(self);
if (strchr("([{", c))
{
IoLexer_readMessage_error(self, "expected a message but instead found a open group character");
}
else
{
IoLexer_readMessage_error(self, "missing closing group character for argument list");
}
*/
if (groupChar == '(')
{
IoLexer_readMessage_error(self, "unmatched ()s");
}
else if (groupChar == '[')
{
IoLexer_readMessage_error(self, "unmatched []s");
}
else if (groupChar == '{')
{
IoLexer_readMessage_error(self, "unmatched {}s");
}
//printf("Token %p error: %s - %s\n", t, t->error, IoToken_error(t));
return 0;
}
IoLexer_popPos(self);
return 1;
}
if (foundSymbol)
{
IoLexer_popPos(self);
return 1;
}
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readPadding(IoLexer *self)
{
int r = 0;
while (IoLexer_readWhitespace(self) || IoLexer_readComment(self))
{
r = 1;
}
return r;
}
// symbols ------------------------------------------
int IoLexer_readSymbol(IoLexer *self)
{
if ( IoLexer_readNumber(self) ||
IoLexer_readOperator(self) ||
IoLexer_readIdentifier(self) ||
IoLexer_readQuote(self)) return 1;
return 0;
}
int IoLexer_readIdentifier(IoLexer *self)
{
IoLexer_pushPos(self);
while ( IoLexer_readLetter(self) ||
IoLexer_readDigit(self) ||
IoLexer_readSpecialChar(self))
{}
if (IoLexer_grabLength(self))
{
// avoid grabing : on last character if followed by =
char *current = IoLexer_current(self);
if (*(current - 1) == ':' && *current == '=')
{
IoLexer_prevChar(self);
}
IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readOperator(IoLexer *self)
{
uchar_t c;
IoLexer_pushPos(self);
// ok if first character is a colon
c = IoLexer_nextChar(self);
//printf("IoLexer_nextChar(self) = %c %i\n", c, c);
if (c == 0)
{
IoLexer_popPosBack(self);
return 0;
}
else
{
IoLexer_prevChar(self);
}
/*
if (c != ':')
{
IoLexer_prevChar(self);
}
*/
while (IoLexer_readOpChar(self))
{ }
if (IoLexer_grabLength(self))
{
IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
// comments ------------------------------------------
int IoLexer_readComment(IoLexer *self)
{
return (IoLexer_readSlashStarComment(self) ||
IoLexer_readSlashSlashComment(self) ||
IoLexer_readPoundComment(self));
}
int IoLexer_readSlashStarComment(IoLexer *self)
{
IoLexer_pushPos(self);
if (IoLexer_readString_(self, "/*"))
{
unsigned int nesting = 1;
while (nesting > 0)
{
if (IoLexer_readString_(self, "/*"))
{
IoLexer_nextChar(self);
nesting++;
}
else if (IoLexer_readString_(self, "*/"))
{
// otherwise we end up trimming the last char
if (nesting > 1) IoLexer_nextChar(self);
nesting--;
}
else
IoLexer_nextChar(self);
}
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readSlashSlashComment(IoLexer *self)
{
IoLexer_pushPos(self);
if (IoLexer_nextChar(self) == '/')
{
if (IoLexer_nextChar(self) == '/')
{
while (IoLexer_readNonReturn(self)) { }
//IoLexer_grabTokenType_(self, COMMENT_TOKEN);
IoLexer_popPos(self);
return 1;
}
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readPoundComment(IoLexer *self)
{
IoLexer_pushPos(self);
if (IoLexer_nextChar(self) == '#')
{
while (IoLexer_readNonReturn(self))
{
}
//IoLexer_grabTokenType_(self, COMMENT_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
// quotes -----------------------------------------
int IoLexer_readQuote(IoLexer *self)
{
return (IoLexer_readTriQuote(self) || IoLexer_readMonoQuote(self));
}
int IoLexer_readMonoQuote(IoLexer *self)
{
int mbskip = 0; // multi-byte character length
IoLexer_pushPos(self);
if (IoLexer_nextChar(self) == '"')
{
for (;;)
{
uchar_t c = IoLexer_nextChar(self);
if (mbskip <= 0 && ismbchar(c))
{
mbskip = mbcharlen(c);
}
if (mbskip-- > 0)
{
continue;
}
if (c == '"')
{
break;
}
if (c == '\\')
{
IoLexer_nextChar(self);
continue;
}
if (c == 0)
{
self->errorToken = IoLexer_currentToken(self);
if (self->errorToken)
{
IoToken_error_(self->errorToken, "unterminated quote");
}
IoLexer_popPosBack(self);
return 0;
}
}
IoLexer_grabTokenType_(self, MONOQUOTE_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readTriQuote(IoLexer *self)
{
IoLexer_pushPos(self);
if (IoLexer_readString_(self, "\"\"\""))
{
while (!IoLexer_readString_(self, "\"\"\""))
{
uchar_t c = IoLexer_nextChar(self);
if (c == 0)
{
IoLexer_popPosBack(self);
return 0;
}
}
IoLexer_grabTokenType_(self, TRIQUOTE_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
// helpers ----------------------------
int IoLexer_readTokenChar_type_(IoLexer *self, char c, IoTokenType type)
{
IoLexer_pushPos(self);
if (IoLexer_readChar_(self, c))
{
IoLexer_grabTokenType_(self, type);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readTokenString_(IoLexer *self, const char *s)
{
IoLexer_pushPos(self);
if (IoLexer_readString_(self, s))
{
IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readString_(IoLexer *self, const char *s)
{
int len = strlen(s);
if (IoLexer_onNULL(self))
{
return 0;
}
if (strncmp(self->current, s, len) == 0)
{
self->current += len;
return 1;
}
return 0;
}
TEST_INLINE int IoLexer_readCharIn_(IoLexer *self, const char *s)
{
if (!IoLexer_onNULL(self))
{
uchar_t c = IoLexer_nextChar(self);
if (c < 0x80 && strchr(s, c))
{
return 1;
}
IoLexer_prevChar(self);
}
return 0;
}
TEST_INLINE int IoLexer_readCharInRange_(IoLexer *self, uchar_t first, uchar_t last)
{
if (!IoLexer_onNULL(self))
{
uchar_t c = IoLexer_nextChar(self);
if (c >= first && c <= last)
{
return 1;
}
IoLexer_prevChar(self);
}
return 0;
}
int IoLexer_readChar_(IoLexer *self, char c)
{
if (!IoLexer_onNULL(self))
{
uchar_t nc = IoLexer_nextChar(self);
if (nc && nc == c)
{
return 1;
}
IoLexer_prevChar(self);
}
return 0;
}
int IoLexer_readCharAnyCase_(IoLexer *self, char c)
{
if (!IoLexer_onNULL(self))
{
uchar_t nc = IoLexer_nextChar(self);
if (nc && tolower(nc) == tolower(c))
{
return 1;
}
IoLexer_prevChar(self);
}
return 0;
}
int IoLexer_readNonASCIIChar_(IoLexer *self)
{
if (!IoLexer_onNULL(self))
{
uchar_t nc = IoLexer_nextChar(self);
if (nc >= 0x80)
return 1;
IoLexer_prevChar(self);
}
return 0;
}
int IoLexer_readNonReturn(IoLexer *self)
{
if (IoLexer_onNULL(self)) return 0;
if (IoLexer_nextChar(self) != '\n') return 1;
IoLexer_prevChar(self);
return 0;
}
int IoLexer_readNonQuote(IoLexer *self)
{
if (IoLexer_onNULL(self)) return 0;
if (IoLexer_nextChar(self) != '"') return 1;
IoLexer_prevChar(self);
return 0;
}
// character definitions ----------------------------
int IoLexer_readCharacters(IoLexer *self)
{
int read = 0;
while (IoLexer_readCharacter(self))
{
read = 1;
}
return read;
}
int IoLexer_readCharacter(IoLexer *self)
{
return (
IoLexer_readLetter(self) ||
IoLexer_readDigit(self) ||
IoLexer_readSpecialChar(self) ||
IoLexer_readOpChar(self)
);
}
int IoLexer_readOpChar(IoLexer *self)
{
return IoLexer_readCharIn_(self, ":'~!@$%^&*-+=|\\<>?/");
}
int IoLexer_readSpecialChar(IoLexer *self)
{
return IoLexer_readCharIn_(self, specialChars);
}
int IoLexer_readDigit(IoLexer *self)
{
return IoLexer_readCharInRange_(self, '0', '9');
}
int IoLexer_readLetter(IoLexer *self)
{
return IoLexer_readCharInRange_(self, 'A', 'Z') ||
IoLexer_readCharInRange_(self, 'a', 'z') ||
IoLexer_readNonASCIIChar_(self);
}
// terminator -------------------------------
int IoLexer_readTerminator(IoLexer *self)
{
int terminated = 0;
IoLexer_pushPos(self);
IoLexer_readSeparator(self);
while (IoLexer_readTerminatorChar(self))
{
terminated = 1;
IoLexer_readSeparator(self);
}
if (terminated)
{
IoToken *top = IoLexer_currentToken(self);
// avoid double terminators
if (top && IoToken_type(top) == TERMINATOR_TOKEN)
{
return 1;
}
IoLexer_addTokenString_length_type_(self, ";", 1, TERMINATOR_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readTerminatorChar(IoLexer *self)
{
return IoLexer_readCharIn_(self, ";\n");
}
// separator --------------------------------
int IoLexer_readSeparator(IoLexer *self)
{
IoLexer_pushPos(self);
while (IoLexer_readSeparatorChar(self))
{
}
if (IoLexer_grabLength(self))
{
//IoLexer_grabTokenType_(self, SEPERATOR_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readSeparatorChar(IoLexer *self)
{
//return IoLexer_readCharIn_(self, " \f\r\t\v");
return IoLexer_readCharIn_(self, " \f\r\t\v") ||
(IoLexer_readCharIn_(self, "\\") &&
IoLexer_readCharIn_(self, "\n"));
}
// whitespace -----------------------------------
int IoLexer_readWhitespace(IoLexer *self)
{
IoLexer_pushPos(self);
while (IoLexer_readWhitespaceChar(self))
{
}
if (IoLexer_grabLength(self))
{
//IoLexer_grabTokenType_(self, WHITESPACE_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readWhitespaceChar(IoLexer *self)
{
return IoLexer_readCharIn_(self, " \f\r\t\v\n");
}
int IoLexer_readDigits(IoLexer *self)
{
int read = 0;
IoLexer_pushPos(self);
while (IoLexer_readDigit(self))
{
read = 1;
}
if (!read)
{
IoLexer_popPosBack(self);
return 0;
}
IoLexer_popPos(self);
return read;
}
int IoLexer_readNumber(IoLexer *self)
{
return (IoLexer_readHexNumber(self) || IoLexer_readDecimal(self));
}
int IoLexer_readExponent(IoLexer *self)
{
if (IoLexer_readCharAnyCase_(self, 'e'))
{
IoLexer_readChar_(self, '-');
if (!IoLexer_readDigits(self))
{
return -1;
}
return 1;
}
return 0;
}
int IoLexer_readDecimalPlaces(IoLexer *self)
{
if (IoLexer_readChar_(self, '.'))
{
if (!IoLexer_readDigits(self))
{
return -1;
}
return 1;
}
return 0;
}
int IoLexer_readDecimal(IoLexer *self)
{
IoLexer_pushPos(self);
if (IoLexer_readDigits(self))
{
if (IoLexer_readDecimalPlaces(self) == -1)
{
goto error;
}
}
else
{
if (IoLexer_readDecimalPlaces(self) != 1)
{
goto error;
}
}
if (IoLexer_readExponent(self) == -1)
{
goto error;
}
if (IoLexer_grabLength(self))
{
IoLexer_grabTokenType_(self, NUMBER_TOKEN);
IoLexer_popPos(self);
return 1;
}
error:
IoLexer_popPosBack(self);
return 0;
}
int IoLexer_readHexNumber(IoLexer *self)
{
int read = 0;
IoLexer_pushPos(self);
if (IoLexer_readChar_(self, '0') && IoLexer_readCharAnyCase_(self, 'x'))
{
while (IoLexer_readDigits(self) || IoLexer_readCharacters(self))
{
read ++;
}
}
if (read && IoLexer_grabLength(self))
{
IoLexer_grabTokenType_(self, HEXNUMBER_TOKEN);
IoLexer_popPos(self);
return 1;
}
IoLexer_popPosBack(self);
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1