#include "Scanner.hpp"
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "Exception.hpp"
extern string fm_reserved[];
extern int fm_reserved_count;
bool isalnumus(byte a) {
return (isalnum(a) || (a=='_'));
}
bool isablank(byte a) {
return (a==' ' || a=='\t' || a=='\r');
}
unsigned Scanner::ContextNum() {
return (m_ptr << 16 | m_linenumber);
}
void Scanner::SetToken(byte tok, string text) {
m_tok = Token(tok,m_ptr << 16 | m_linenumber,text);
}
bool Scanner::Done() {
return (m_ptr >= m_text.size());
}
bool Scanner::Peek(int chars, byte tok) {
return (ahead(chars) == tok);
}
Scanner::Scanner(string buf, string fname) {
m_text = buf;
m_filename = fname;
m_ptr = 0;
m_linenumber = 1;
m_tokValid = false;
m_inContinuationState = false;
m_bracketDepth = 0;
m_strlen = buf.size();
m_ignorews.push(true);
m_debugFlag = false;
m_blobFlag = false;
}
void Scanner::FetchContinuation() {
m_ptr += 3;
while ((current() != '\n') && (m_ptr < m_strlen))
m_ptr++;
if (current() == '\n') {
m_linenumber++;
m_ptr++;
}
m_inContinuationState = true;
}
void Scanner::Fetch() {
if (m_ptr >= m_strlen)
SetToken(TOK_EOF);
else if (current() == '%') {
FetchComment();
return;
} else if ((current() == '.') &&
(ahead(1) == '.') &&
(ahead(2) == '.')) {
FetchContinuation();
return;
} else if (m_blobFlag && !isablank(current()) &&
(current() != '\n') && (current() != ';') &&
(current() != ',') && (current() != '\'') &&
(current() != '%'))
FetchBlob();
else if (isalpha(current()))
FetchIdentifier();
else if (isdigit(current()) || ((current() == '.') && isdigit(ahead(1))))
FetchNumber();
else if (isablank(current())) {
FetchWhitespace();
if (m_ignorews.top()) return;
} else if ((current() == '\'') && !((previous() == '\'') ||
(previous() == ')') ||
(previous() == ']') ||
(previous() == '}') ||
(isalnumus(previous())))) {
FetchString();
} else
FetchOther();
m_tokValid = true;
}
bool Scanner::TryFetchBinary(const char* op, byte tok) {
if ((current() == op[0]) && (ahead(1) == op[1])) {
SetToken(tok);
m_ptr += 2;
return true;
}
return false;
}
void Scanner::FetchComment() {
while ((current() != '\n') && (m_ptr < m_strlen))
m_ptr++;
}
void Scanner::FetchOther() {
if (current() == '.') {
if (TryFetchBinary(".*",TOK_DOTTIMES)) return;
if (TryFetchBinary("./",TOK_DOTRDIV)) return;
if (TryFetchBinary(".\\",TOK_DOTLDIV)) return;
if (TryFetchBinary(".^",TOK_DOTPOWER)) return;
if (TryFetchBinary(".'",TOK_DOTTRANSPOSE)) return;
}
if (TryFetchBinary("<=",TOK_LE)) return;
if (TryFetchBinary(">=",TOK_GE)) return;
if (TryFetchBinary("==",TOK_EQ)) return;
if (TryFetchBinary("~=",TOK_NE)) return;
if (TryFetchBinary("&&",TOK_SAND)) return;
if (TryFetchBinary("||",TOK_SOR)) return;
SetToken(m_text[m_ptr]);
if (m_text[m_ptr] == '[')
m_bracketDepth++;
if (m_text[m_ptr] == ']')
m_bracketDepth = min(0,m_bracketDepth-1);
if (m_text[m_ptr] == '{')
m_bracketDepth++;
if (m_text[m_ptr] == '}')
m_bracketDepth = min(0,m_bracketDepth-1);
m_ptr++;
}
void Scanner::FetchString() {
int len = 0;
// We want to advance, but skip double quotes
// while ((next() != ') || ((next() == ') && (next(2) == ')) && (next() != '\n')
while (((ahead(len+1) != '\'') ||
((ahead(len+1) == '\'') && (ahead(len+2) == '\''))) &&
(ahead(len+1) != '\n')) {
if ((ahead(len+1) == '\'') &&
(ahead(len+2) == '\'')) len+=2;
else
len++;
}
if (ahead(len+1) == '\n')
throw Exception("unterminated string" + Context());
string ret(m_text,m_ptr+1,len);
string::size_type ndx = ret.find("''");
while (ndx != string::npos) {
ret.erase(ndx,1);
ndx = ret.find("''");
}
SetToken(TOK_STRING,ret);
m_ptr += len+2;
}
void Scanner::FetchWhitespace() {
int len = 0;
while (isablank(ahead(len))) len++;
SetToken(TOK_SPACE);
m_ptr += len;
}
//A number consists of something like:
//{integer}.{integer}E{sign}{integer}
// s1 s2 s3 s4 s5 s6
// .{integer}E{sign}{integer}
//
// <Float><Exponent>
// <Float>
// <Integer>
//
// flags - int, float, double, complex
//
typedef enum {
integer_class,
float_class,
double_class,
complex_class,
dcomplex_class
} number_class;
void Scanner::FetchNumber() {
int len = 0;
int lookahead = 0;
number_class numclass;
numclass = integer_class;
while (isdigit(ahead(len))) len++;
lookahead = len;
if (ahead(lookahead) == '.') {
numclass = double_class;
lookahead++;
len = 0;
while (isdigit(ahead(len+lookahead))) len++;
lookahead+=len;
}
if ((ahead(lookahead) == 'E') ||
(ahead(lookahead) == 'e')) {
numclass = double_class;
lookahead++;
if ((ahead(lookahead) == '+') ||
(ahead(lookahead) == '-')) {
lookahead++;
}
len = 0;
while (isdigit(ahead(len+lookahead))) len++;
lookahead+=len;
}
if ((ahead(lookahead) == 'f') ||
(ahead(lookahead) == 'F')) {
numclass = float_class;
lookahead++;
}
if ((ahead(lookahead) == 'd') ||
(ahead(lookahead) == 'D')) {
numclass = double_class;
lookahead++;
}
// Recognize the complex constants, but strip the "i" off
if ((ahead(lookahead) == 'i') ||
(ahead(lookahead) == 'I') ||
(ahead(lookahead) == 'j') ||
(ahead(lookahead) == 'J')) {
numclass = (numclass == float_class) ? complex_class : dcomplex_class;
}
// Back off if we aggregated a "." from "..." into the number
if (((ahead(lookahead-1) == '.') &&
(ahead(lookahead) == '.') &&
(ahead(lookahead+1) == '.') &&
(ahead(lookahead+2) != '.')) ||
((ahead(lookahead-1) == '.') &&
((ahead(lookahead) == '*') ||
(ahead(lookahead) == '/') ||
(ahead(lookahead) == '\\') ||
(ahead(lookahead) == '^') ||
(ahead(lookahead) == '\'')))) lookahead--;
string numtext(string(m_text,m_ptr,lookahead));
m_ptr += lookahead;
if ((numclass == complex_class) ||
(numclass == dcomplex_class))
m_ptr++;
switch (numclass) {
case integer_class:
SetToken(TOK_INTEGER,numtext);
return;
case float_class:
SetToken(TOK_FLOAT,numtext);
return;
case double_class:
SetToken(TOK_DOUBLE,numtext);
return;
case complex_class:
SetToken(TOK_COMPLEX,numtext);
return;
case dcomplex_class:
SetToken(TOK_DCOMPLEX,numtext);
return;
}
}
void Scanner::FetchIdentifier() {
int len = 0;
while (isalnumus(ahead(len))) len++;
// Collect the identifier into a string
string ident(string(m_text,m_ptr,len));
string *p = lower_bound(fm_reserved,fm_reserved+fm_reserved_count,ident);
if ((p!= fm_reserved+fm_reserved_count) && (*p == ident))
SetToken(TOK_KEYWORD+(p-fm_reserved)+1);
else
SetToken(TOK_IDENT,string(m_text,m_ptr,len));
m_ptr += len;
}
// A Blob is either:
// 1. A regular string (with quote delimiters)
// 2. A sequence of characters with either a whitespace
// a comma or a colon.
void Scanner::FetchBlob() {
if (current() == '\'') {
FetchString();
m_tokValid = true;
} else {
int len = 0;
while ((ahead(len) != '\n') && (!isablank(ahead(len))) &&
(ahead(len) != '%') && (ahead(len) != ',') &&
(ahead(len) != ';')) len++;
if (len > 0) {
SetToken(TOK_STRING,string(m_text,m_ptr,len));
m_ptr += len;
m_tokValid = true;
}
}
}
const Token& Scanner::Next() {
while (!m_tokValid) {
Fetch();
if (m_tokValid && m_debugFlag)
cout << m_tok;
if ((m_ptr < m_strlen) && (current() == '\n'))
m_linenumber++;
}
if (m_inContinuationState && m_tokValid && !m_tok.Is(TOK_EOF))
m_inContinuationState = false;
return m_tok;
}
bool Scanner::InContinuationState() {
return m_inContinuationState;
}
bool Scanner::InBracket() {
return (m_bracketDepth>0);
}
void Scanner::Consume() {
m_tokValid = false;
}
byte Scanner::current() {
if (m_ptr < m_strlen)
return m_text.at(m_ptr);
else
return 0;
}
byte Scanner::previous() {
if (m_ptr)
return m_text.at(m_ptr-1);
else
return 0;
}
void Scanner::PushWSFlag(bool ignoreWS) {
m_ignorews.push(ignoreWS);
}
void Scanner::PopWSFlag() {
m_ignorews.pop();
}
byte Scanner::ahead(int n) {
if ((m_ptr+n) >= m_text.size())
return 0;
else
return m_text.at(m_ptr+n);
}
string Scanner::Context() {
return Context(ContextNum());
}
string stringFromNumber(unsigned line) {
char buffer[1000];
sprintf(buffer,"%d",line);
return string(buffer);
}
string Scanner::Snippet(unsigned pos1, unsigned pos2) {
unsigned ptr1 = pos1 >> 16;
unsigned ptr2 = pos2 >> 16;
return string(m_text,ptr1,ptr2-ptr1+1);
}
string Scanner::Context(unsigned pos) {
pos = pos >> 16;
string::size_type line_start = 0;
int linenumber = 1;
string::size_type line_stop = m_text.find("\n");
string prevline;
while (pos > line_stop) {
prevline = string(m_text,line_start,line_stop-line_start);
line_start = line_stop+1;
line_stop = m_text.find("\n",line_start);
linenumber++;
}
string retstring;
if (m_filename.size() > 0) {
retstring = " at line number: " + stringFromNumber(linenumber);
retstring += " of file " + m_filename + "\n";
} else
retstring += "\n";
retstring += " " + prevline + "\n";
retstring += " " + string(m_text,line_start,line_stop-line_start);
int offset = pos-line_start-1;
if (offset < 0) offset = 0;
retstring += "\n " + string(offset,' ') + "^";
return(retstring);
}
syntax highlighted by Code2HTML, v. 0.9.1