#include "Scanner.hpp" #include #include #include #include #include "Exception.hpp" extern string fm_reserved[]; extern int fm_reserved_count; bool isalnumus(byte a) { return (isalnum(a) || (a=='_')); } bool isablank(byte a) { return (a==' ' || a=='\t' || a=='\r'); } unsigned Scanner::ContextNum() { return (m_ptr << 16 | m_linenumber); } void Scanner::SetToken(byte tok, string text) { m_tok = Token(tok,m_ptr << 16 | m_linenumber,text); } bool Scanner::Done() { return (m_ptr >= m_text.size()); } bool Scanner::Peek(int chars, byte tok) { return (ahead(chars) == tok); } Scanner::Scanner(string buf, string fname) { m_text = buf; m_filename = fname; m_ptr = 0; m_linenumber = 1; m_tokValid = false; m_inContinuationState = false; m_bracketDepth = 0; m_strlen = buf.size(); m_ignorews.push(true); m_debugFlag = false; m_blobFlag = false; } void Scanner::FetchContinuation() { m_ptr += 3; while ((current() != '\n') && (m_ptr < m_strlen)) m_ptr++; if (current() == '\n') { m_linenumber++; m_ptr++; } m_inContinuationState = true; } void Scanner::Fetch() { if (m_ptr >= m_strlen) SetToken(TOK_EOF); else if (current() == '%') { FetchComment(); return; } else if ((current() == '.') && (ahead(1) == '.') && (ahead(2) == '.')) { FetchContinuation(); return; } else if (m_blobFlag && !isablank(current()) && (current() != '\n') && (current() != ';') && (current() != ',') && (current() != '\'') && (current() != '%')) FetchBlob(); else if (isalpha(current())) FetchIdentifier(); else if (isdigit(current()) || ((current() == '.') && isdigit(ahead(1)))) FetchNumber(); else if (isablank(current())) { FetchWhitespace(); if (m_ignorews.top()) return; } else if ((current() == '\'') && !((previous() == '\'') || (previous() == ')') || (previous() == ']') || (previous() == '}') || (isalnumus(previous())))) { FetchString(); } else FetchOther(); m_tokValid = true; } bool Scanner::TryFetchBinary(const char* op, byte tok) { if ((current() == op[0]) && (ahead(1) == op[1])) { SetToken(tok); m_ptr += 2; return true; } return false; } void Scanner::FetchComment() { while ((current() != '\n') && (m_ptr < m_strlen)) m_ptr++; } void Scanner::FetchOther() { if (current() == '.') { if (TryFetchBinary(".*",TOK_DOTTIMES)) return; if (TryFetchBinary("./",TOK_DOTRDIV)) return; if (TryFetchBinary(".\\",TOK_DOTLDIV)) return; if (TryFetchBinary(".^",TOK_DOTPOWER)) return; if (TryFetchBinary(".'",TOK_DOTTRANSPOSE)) return; } if (TryFetchBinary("<=",TOK_LE)) return; if (TryFetchBinary(">=",TOK_GE)) return; if (TryFetchBinary("==",TOK_EQ)) return; if (TryFetchBinary("~=",TOK_NE)) return; if (TryFetchBinary("&&",TOK_SAND)) return; if (TryFetchBinary("||",TOK_SOR)) return; SetToken(m_text[m_ptr]); if (m_text[m_ptr] == '[') m_bracketDepth++; if (m_text[m_ptr] == ']') m_bracketDepth = min(0,m_bracketDepth-1); if (m_text[m_ptr] == '{') m_bracketDepth++; if (m_text[m_ptr] == '}') m_bracketDepth = min(0,m_bracketDepth-1); m_ptr++; } void Scanner::FetchString() { int len = 0; // We want to advance, but skip double quotes // while ((next() != ') || ((next() == ') && (next(2) == ')) && (next() != '\n') while (((ahead(len+1) != '\'') || ((ahead(len+1) == '\'') && (ahead(len+2) == '\''))) && (ahead(len+1) != '\n')) { if ((ahead(len+1) == '\'') && (ahead(len+2) == '\'')) len+=2; else len++; } if (ahead(len+1) == '\n') throw Exception("unterminated string" + Context()); string ret(m_text,m_ptr+1,len); string::size_type ndx = ret.find("''"); while (ndx != string::npos) { ret.erase(ndx,1); ndx = ret.find("''"); } SetToken(TOK_STRING,ret); m_ptr += len+2; } void Scanner::FetchWhitespace() { int len = 0; while (isablank(ahead(len))) len++; SetToken(TOK_SPACE); m_ptr += len; } //A number consists of something like: //{integer}.{integer}E{sign}{integer} // s1 s2 s3 s4 s5 s6 // .{integer}E{sign}{integer} // // // // // // flags - int, float, double, complex // typedef enum { integer_class, float_class, double_class, complex_class, dcomplex_class } number_class; void Scanner::FetchNumber() { int len = 0; int lookahead = 0; number_class numclass; numclass = integer_class; while (isdigit(ahead(len))) len++; lookahead = len; if (ahead(lookahead) == '.') { numclass = double_class; lookahead++; len = 0; while (isdigit(ahead(len+lookahead))) len++; lookahead+=len; } if ((ahead(lookahead) == 'E') || (ahead(lookahead) == 'e')) { numclass = double_class; lookahead++; if ((ahead(lookahead) == '+') || (ahead(lookahead) == '-')) { lookahead++; } len = 0; while (isdigit(ahead(len+lookahead))) len++; lookahead+=len; } if ((ahead(lookahead) == 'f') || (ahead(lookahead) == 'F')) { numclass = float_class; lookahead++; } if ((ahead(lookahead) == 'd') || (ahead(lookahead) == 'D')) { numclass = double_class; lookahead++; } // Recognize the complex constants, but strip the "i" off if ((ahead(lookahead) == 'i') || (ahead(lookahead) == 'I') || (ahead(lookahead) == 'j') || (ahead(lookahead) == 'J')) { numclass = (numclass == float_class) ? complex_class : dcomplex_class; } // Back off if we aggregated a "." from "..." into the number if (((ahead(lookahead-1) == '.') && (ahead(lookahead) == '.') && (ahead(lookahead+1) == '.') && (ahead(lookahead+2) != '.')) || ((ahead(lookahead-1) == '.') && ((ahead(lookahead) == '*') || (ahead(lookahead) == '/') || (ahead(lookahead) == '\\') || (ahead(lookahead) == '^') || (ahead(lookahead) == '\'')))) lookahead--; string numtext(string(m_text,m_ptr,lookahead)); m_ptr += lookahead; if ((numclass == complex_class) || (numclass == dcomplex_class)) m_ptr++; switch (numclass) { case integer_class: SetToken(TOK_INTEGER,numtext); return; case float_class: SetToken(TOK_FLOAT,numtext); return; case double_class: SetToken(TOK_DOUBLE,numtext); return; case complex_class: SetToken(TOK_COMPLEX,numtext); return; case dcomplex_class: SetToken(TOK_DCOMPLEX,numtext); return; } } void Scanner::FetchIdentifier() { int len = 0; while (isalnumus(ahead(len))) len++; // Collect the identifier into a string string ident(string(m_text,m_ptr,len)); string *p = lower_bound(fm_reserved,fm_reserved+fm_reserved_count,ident); if ((p!= fm_reserved+fm_reserved_count) && (*p == ident)) SetToken(TOK_KEYWORD+(p-fm_reserved)+1); else SetToken(TOK_IDENT,string(m_text,m_ptr,len)); m_ptr += len; } // A Blob is either: // 1. A regular string (with quote delimiters) // 2. A sequence of characters with either a whitespace // a comma or a colon. void Scanner::FetchBlob() { if (current() == '\'') { FetchString(); m_tokValid = true; } else { int len = 0; while ((ahead(len) != '\n') && (!isablank(ahead(len))) && (ahead(len) != '%') && (ahead(len) != ',') && (ahead(len) != ';')) len++; if (len > 0) { SetToken(TOK_STRING,string(m_text,m_ptr,len)); m_ptr += len; m_tokValid = true; } } } const Token& Scanner::Next() { while (!m_tokValid) { Fetch(); if (m_tokValid && m_debugFlag) cout << m_tok; if ((m_ptr < m_strlen) && (current() == '\n')) m_linenumber++; } if (m_inContinuationState && m_tokValid && !m_tok.Is(TOK_EOF)) m_inContinuationState = false; return m_tok; } bool Scanner::InContinuationState() { return m_inContinuationState; } bool Scanner::InBracket() { return (m_bracketDepth>0); } void Scanner::Consume() { m_tokValid = false; } byte Scanner::current() { if (m_ptr < m_strlen) return m_text.at(m_ptr); else return 0; } byte Scanner::previous() { if (m_ptr) return m_text.at(m_ptr-1); else return 0; } void Scanner::PushWSFlag(bool ignoreWS) { m_ignorews.push(ignoreWS); } void Scanner::PopWSFlag() { m_ignorews.pop(); } byte Scanner::ahead(int n) { if ((m_ptr+n) >= m_text.size()) return 0; else return m_text.at(m_ptr+n); } string Scanner::Context() { return Context(ContextNum()); } string stringFromNumber(unsigned line) { char buffer[1000]; sprintf(buffer,"%d",line); return string(buffer); } string Scanner::Snippet(unsigned pos1, unsigned pos2) { unsigned ptr1 = pos1 >> 16; unsigned ptr2 = pos2 >> 16; return string(m_text,ptr1,ptr2-ptr1+1); } string Scanner::Context(unsigned pos) { pos = pos >> 16; string::size_type line_start = 0; int linenumber = 1; string::size_type line_stop = m_text.find("\n"); string prevline; while (pos > line_stop) { prevline = string(m_text,line_start,line_stop-line_start); line_start = line_stop+1; line_stop = m_text.find("\n",line_start); linenumber++; } string retstring; if (m_filename.size() > 0) { retstring = " at line number: " + stringFromNumber(linenumber); retstring += " of file " + m_filename + "\n"; } else retstring += "\n"; retstring += " " + prevline + "\n"; retstring += " " + string(m_text,line_start,line_stop-line_start); int offset = pos-line_start-1; if (offset < 0) offset = 0; retstring += "\n " + string(offset,' ') + "^"; return(retstring); }