/***************************************************************************
* Copyright (C) 2004 by Tomas Mecir *
* kmuddy@kmuddy.org *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU Library General Public License as *
* published by the Free Software Foundation; either version 2 of the *
* License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Library General Public License for more details. *
***************************************************************************/
#include "cmxpparser.h"
#include "celementmanager.h"
#include "cmxpstate.h"
#include "cresulthandler.h"
cMXPParser::cMXPParser (cMXPState *st, cElementManager *elm, cResultHandler *res)
{
state = st;
elements = elm;
results = res;
pstate = pText;
wasBackslashR = false;
}
cMXPParser::~cMXPParser ()
{
}
void cMXPParser::reset ()
{
str = "";
pstate = pText;
wasBackslashR = false;
chunks.clear();
}
void cMXPParser::parse (const string &text)
{
//WARNING: examine this function only at your own risk!
//it is advised to have a look at the simpleParse() function first - it's similar
//to this one, but much simpler...
if (text.empty())
return;
string::const_iterator it;
for (it = text.begin(); it != text.end(); ++it)
{
char c = *it;
//handle \r not followed by \n - treated as a newline
if (wasBackslashR && (c != '\n'))
{
//"str" now certainly is empty, so we needn't care about that
//report new-line
elements->gotNewLine();
state->gotNewLine();
}
wasBackslashR = false;
//we need current mode - parsing in LOCKED mode is limited
//mode is retrieved in every iteration to ensure that it's always up-to-date
mxpMode mode = state->getMXPMode();
switch (pstate) {
case pText: {
//tags not recognized in LOCKED mode...
if ((c == '\e') || ((mode != lockedMode) && (c == '<')) || (c == '\n') || (c == '\r'))
{
//end of text - got newline / ANSI seq / start of tag
if (!str.empty())
{
state->gotText (str);
str = "";
}
if (c == '\e')
pstate = pAnsiSeq;
if ((c == '<') && (mode != lockedMode))
pstate = pTag;
if (c == '\n')
{
//report new-line
elements->gotNewLine();
state->gotNewLine();
}
if (c == '\r')
wasBackslashR = true;
}
else
str += c; //add new character to the text...
break;
};
case pAnsiSeq: {
if ((c == '\e') || (c == '\n') || (c == '\r'))
{
//the same as in pTag section...
results->addToList (results->createError ("Received unfinished ANSI sequence!"));
str = "";
if (c == '\e')
pstate = pAnsiSeq;
if (c == '\n')
{
//report new-line
elements->gotNewLine();
state->gotNewLine();
pstate = pText;
}
if (c == '\r')
{
pstate = pText;
wasBackslashR = true;
}
}
else
if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))
{
//ANSI sequence ends...
if (c == 'z') //line tag
{
if (str.empty())
{
//invalid sequence
str = "\ez";
}
else
{
//process this sequence
int len = str.length();
int num = 0;
for (int i = 1; i < len; i++) //str[0] is '[', which is SKIPPED
{
char cc = str[i];
if (cc == ';') //this shouldn't happen, but some MUD might want to use it...
{
if ((num >= 0) && (num <= 99)) //ensure that number lies in correct range
{
state->gotLineTag (num);
elements->gotLineTag (num);
}
else
results->addToList (results->createError ("Received invalid line tag!"));
num = 0;
}
else
num = num * 10 + (cc - 48); //48 is the code of '0'
}
//report last line tag (and usually the only one)
if ((num >= 0) && (num <= 99)) //ensure that number lies in correct range
{
state->gotLineTag (num);
elements->gotLineTag (num);
}
else
results->addToList (results->createError ("Received invalid line tag!"));
str = "";
}
}
else //something else
{
//'\e' and c are not in the string - add them there
str = '\e' + str + c;
}
pstate = pText;
}
else
if (c == '[') //this one is valid, but only at the beginning
{
if (str.empty())
str += c;
else
{
//'[' in the middle of ANSI seq => not an ANSI seq...
pstate = pText;
str = '\e' + str + c;
}
}
else
if ((c == ';') || ((c >= '0') && (c <= '9'))) //correct char, unless str is empty
if (!str.empty())
str += c; //here we go...
else
{
//ANSI seq must start with [ - therefore this is not an ANSI sequence after all
pstate = pText;
str += '\e';
str += c;
}
else
//incorrect character...
{
str = '\e' + str + c;
pstate = pText;
}
break;
};
case pTag: {
if (c == '>')
{
elements->gotTag (str);
str = "";
pstate = pText;
}
else
if ((c == '"') || (c == '\''))
{
pstate = pQuotedParam;
quoteChar = c;
str += c;
}
else if ((c == '\e') || (c == '\n') || (c == '\r'))
{
//handle incorrectly terminated tag and continue parsing...
results->addToList (results->createError ("Received unfinished tag <" + str));
str = "";
if (c == '\e')
pstate = pAnsiSeq;
if (c == '\n')
{
//report new-line
elements->gotNewLine();
state->gotNewLine();
pstate = pText;
}
if (c == '\r')
{
pstate = pText;
wasBackslashR = true;
}
}
else if (str == "!--") //comment
{
str += c;
pstate = pComment;
}
else
str += c;
break;
};
case pComment: {
if (c == '>')
{
int l = str.length();
if ((str[l-2] == '-') && (str[l-1] == '-')) //okay, comment ends
{
str = "";
pstate = pText;
}
else
str += c;
}
else if ((c == '\e') || (c == '\n') || (c == '\r'))
{
//handle incorrectly terminated comment and continue parsing...
results->addToList (results->createError ("Received an unfinished comment!"));
str = "";
if (c == '\e')
pstate = pAnsiSeq;
if (c == '\n')
{
//report new-line
elements->gotNewLine();
state->gotNewLine();
pstate = pText;
}
if (c == '\r')
{
pstate = pText;
wasBackslashR = true;
}
}
else
str += c;
break;
};
case pQuotedParam: {
if (c == quoteChar)
{
//quoted parameter ends... this simple approach will work correctly for correct
//tags, it may treat incorrect quotes as correct, but element manager will take care
//of that
pstate = pTag;
str += c;
}
else
if ((c == '\e') || (c == '\n') || (c == '\r'))
{
//the same as in pTag section...
results->addToList (results->createError ("Received unfinished tag <" + str));
str = "";
if (c == '\e')
pstate = pAnsiSeq;
if (c == '\n')
{
//report new-line
elements->gotNewLine();
state->gotNewLine();
pstate = pText;
}
if (c == '\r')
{
pstate = pText;
wasBackslashR = true;
}
}
else
str += c;
break;
};
};
}
//report remaining text, if any (needed to improve speed of text displaying and to handle
//prompts correctly)
if ((pstate == pText) && (!str.empty()))
{
state->gotText (str);
str = "";
}
}
void cMXPParser::simpleParse (const string &text)
//simple parsing - only text and tags - no newlines, no ANSI sequences, no line tags
//simpler version of parse() function above
{
if (text.empty())
return;
chunk ch;
string::const_iterator it;
pstate = pText;
str = "";
for (it = text.begin(); it != text.end(); ++it)
{
char c = *it;
switch (pstate) {
case pText: {
if (c == '<')
{
//end of text - got start of tag
if (!str.empty())
{
ch.chk = chunkText;
ch.text = str;
chunks.push_back (ch);
str = "";
}
pstate = pTag;
}
else
str += c; //add new character to the text...
break;
};
case pTag: {
if (c == '>')
{
ch.chk = chunkTag;
ch.text = str;
chunks.push_back (ch);
str = "";
pstate = pText;
}
else
if ((c == '"') || (c == '\''))
{
pstate = pQuotedParam;
quoteChar = c;
str += c;
}
else
str += c;
break;
};
case pQuotedParam: {
if (c == quoteChar)
{
//quoted parameter ends... this simple approach will work correctly for correct
//tags, it may treat incorrect quotes as correct, but element manager will take care
//of that
pstate = pTag;
str += c;
}
else
str += c;
break;
};
};
}
//unfinished things...
if (pstate == pText)
{
ch.chk = chunkText;
ch.text = str;
chunks.push_back (ch);
}
if ((pstate == pTag) || (pstate == pQuotedParam))
{
ch.chk = chunkError;
ch.text = "Tag definition contains unfinished tag <" + str;
chunks.push_back (ch);
}
str = "";
}
bool cMXPParser::hasNext()
{
return chunks.empty() ? false : true;
}
chunk cMXPParser::getNext()
{
if (!hasNext())
{
chunk nochunk;
nochunk.chk = chunkNone;
return nochunk;
}
chunk ch = chunks.front();
chunks.pop_front();
return ch;
}
syntax highlighted by Code2HTML, v. 0.9.1